diff --git a/client/lib/sort_preference.dart b/client/lib/sort_preference.dart index af87c708..749dbd7c 100644 --- a/client/lib/sort_preference.dart +++ b/client/lib/sort_preference.dart @@ -25,8 +25,7 @@ class SortMode extends ChangeNotifier { } // This type should not be a widget(e.g. Icon) because of mockito support... - IconData get icon => - _isDescOrder ? Icons.arrow_upward : Icons.arrow_downward; + IconData get icon => _isDescOrder ? Icons.arrow_upward : Icons.arrow_downward; // This type should not be a widget(e.g. Text) because of mockito support... String get text => _isDescOrder ? "오름차순으로" : "내림차순으로"; diff --git a/dbctl/README.md b/dbctl/README.md index ad3fd1bd..ec884d3d 100644 --- a/dbctl/README.md +++ b/dbctl/README.md @@ -1,46 +1,25 @@ -# Metadata manager - -## 1. Generate a metadata from the mapping table - -MappingTable(database.proto) - -```proto -message MappingTable { - repeated MappingTableRow rows = 1; -} - -message MappingTableRow { - int32 pr_id = 1; - repeated string paper_arxiv_id = 2; - string youtube_video_id = 3; -} -``` - -Database(database.proto) - -```proto -message Database { - map pr_id_to_video = 1; -} - -// PR영상의 하나의 레코드 -message PrVideo { - int32 pr_id = 1; - // 관련도에 따라 정렬 - repeated Paper papers = 2; - YouTubeVideo video = 3; -} - -// 유튜브 1편에 대한 정보 -// `pkg.pr12er.Video` 생성하기 위해 사용됩니다. -message YouTubeVideo { - string video_id = 1; - string video_title = 2; - int64 number_of_likes = 3; - int64 number_of_views = 4; - google.protobuf.Timestamp published_date = 5; - string uploader = 6; -} -``` - -2. +# dbctl + +`dbctl` 은 데이터베이스 파일을 업데이트 하는데 사용됩니다. + +## 새로운 PR 영상을 추가하기 + +1. PR 영상의 유튜브 주소를 획득합니다. +2. 아래 스크립트를 실행합니다 (bash 기준). + + ```bash + youtube_links=( + https://youtu.be/-5fFL68d7Gg + https://youtu.be/I9kQwMbpxuE + https://youtu.be/HWf8CmTAIR4 + https://youtu.be/FFXAm2uTmeI + https://youtu.be/20kxrS2yglg + https://youtu.be/BZwUR9hvBPE + https://youtu.be/hptinxZIXT4 + ) + + for youtube in ${youtube_links[@]} + do + go run main.go youtube --youtube-link $youtube >> ../server/internal/data/mapping_table.pbtxt + done + ``` diff --git a/dbctl/internal/transform/transform.go b/dbctl/internal/transform/transform.go index 9495b735..1f24058e 100644 --- a/dbctl/internal/transform/transform.go +++ b/dbctl/internal/transform/transform.go @@ -93,10 +93,18 @@ func ExtractPaperIDs(title string) ([]string, error) { } var paperIDs []string + // save paperIDs that were already seen. + seenPaperID := map[string]bool{} for i := 0; i < maxLen; i++ { arxivID, _ := ExtractArxivIDFromURL(search[i].URL) + + if seenPaperID[arxivID] { + continue + } + paperIDs = append(paperIDs, arxivID) + seenPaperID[arxivID] = true } return paperIDs, nil diff --git a/server/internal/data/database.pbtxt b/server/internal/data/database.pbtxt index aea243a1..4fade02c 100644 --- a/server/internal/data/database.pbtxt +++ b/server/internal/data/database.pbtxt @@ -19,68 +19,70 @@ pr_id_to_video: { authors: "Aaron Courville" authors: "Yoshua Bengio" repositories: { - url: "https://github.com/jskDr/keraspp_2021" - owner: "jskDr" + url: "https://github.com/harish678/PyTorch-Lightning-Examples/blob/master/gan_lightning.py" + owner: "master" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Models implemented in PyTorch Lightning" } repositories: { - url: "https://github.com/JaryV/CycleGAN_OldYoung" - owner: "JaryV" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/syahdeini/gan" + owner: "syahdeini" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 + description: "understanding generative adversarial networks (GAN) by code" } repositories: { - url: "https://github.com/asiltureli/gan-in-colab" - owner: "asiltureli" + url: "https://github.com/jtiger958/pytorch-computer-vision-basic" + owner: "jtiger958" framework: FRAMEWORK_PYTORCH - description: "GAN implementations on Google Colab" + number_of_stars: 8 } repositories: { - url: "https://github.com/rohitkuk/AnimeGAN" - owner: "rohitkuk" + url: "https://github.com/avillemin/GANs" + owner: "avillemin" framework: FRAMEWORK_PYTORCH - number_of_stars: 17 - description: "Generating Anime Images by Implementing Deep Convolutional Generative Adversarial Networks paper " + number_of_stars: 2 + description: "Generative Adversarial Networks" } repositories: { - url: "https://github.com/ddehueck/pytorch-GAN" - owner: "ddehueck" + url: "https://github.com/ConstantinLC/GAN-Implementations" + owner: "ConstantinLC" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "PyTorch implementation of the original GAN paper by Goodfellow et al." } repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - owner: "roberttwomey" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" + url: "https://github.com/lilianweng/unified-gan-tensorflow" + owner: "lilianweng" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 94 + description: "A Tensorflow implementation of GAN, WGAN and WGAN with gradient penalty." } repositories: { - url: "https://github.com/MaximeVandegar/Papers-in-100-Lines-of-Code" - owner: "MaximeVandegar" + url: "https://github.com/suzana-ilic/pytorch_DCGANs" + owner: "suzana-ilic" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Implementation of papers in 100 lines of code." + number_of_stars: 6 + description: "Training Deep Convolutional Generative Adversarial Networks (DCGANs): Experimenting with small datasets, colors and patterns" } repositories: { - url: "https://github.com/dhrim/andong_2021" - owner: "dhrim" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + url: "https://github.com/lyeoni/pytorch-mnist-GAN" + owner: "lyeoni" + framework: FRAMEWORK_PYTORCH + number_of_stars: 30 } repositories: { - url: "https://github.com/lab-ml/annotated_deep_learning_paper_implementations/tree/master/labml_nn/gan/original" - owner: "gan" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + url: "https://github.com/qinpengzhi/myDCGAN" + owner: "qinpengzhi" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "DCNAN论文的实现(参考大神的代码,基本只是自己把大神代码重新理解了一遍)" } repositories: { - url: "https://github.com/jhKessler/Progressively-Growing-Generative-Adverserial-Network" - owner: "jhKessler" + url: "https://github.com/nsom/Basic-Models" + owner: "nsom" framework: FRAMEWORK_PYTORCH - description: "Generative Adverserial Network for Image Generation" + number_of_stars: 1 + description: "My Pytorch Implementation of some basic models." } methods: { name: "GAN" @@ -96,8 +98,8 @@ pr_id_to_video: { video: { video_id: "L3hz57whyNw" video_title: "PR-001: Generative adversarial nets by Jaejun Yoo (2017/4/13)" - number_of_likes: 256 - number_of_views: 34726 + number_of_likes: 263 + number_of_views: 35043 published_date: { seconds: 1492839397 } @@ -125,71 +127,64 @@ pr_id_to_video: { authors: "Han Hu" authors: "Yichen Wei" repositories: { - url: "https://github.com/ximilar-com/xcenternet" - owner: "ximilar-com" + url: "https://github.com/NVIDIAAICITYCHALLENGE/AICity_Team6_ISU" + owner: "NVIDIAAICITYCHALLENGE" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 131 - description: "Fast anchor free Object Detection based on CenterNet (Objects As Points) and TTFNet (Training-Time-Friendly Network). Implemented in TensorFlow 2.4+." + number_of_stars: 20 + description: "Source code and code description of Team6_ISU for NVIDIA AICity Challenge 2017 track 1" } repositories: { - url: "https://github.com/esw0116/DynaVSR" - owner: "esw0116" - framework: FRAMEWORK_PYTORCH - number_of_stars: 42 - description: "DynaVSR: Dynamic Adaptive Blind VideoSuper-Resolution" + url: "https://github.com/qilei123/fpn_crop" + owner: "qilei123" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } repositories: { - url: "https://github.com/bkvie/Locally-Consistent-Deformable-Convolution" - owner: "bkvie" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Locally Consistent Deformable Convolution as part of deformable flow" + url: "https://github.com/qilei123/DEEPLAB_4_RETINAIMG" + owner: "qilei123" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/zhusiling/EDVR" - owner: "zhusiling" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + url: "https://github.com/guanfuchen/Deformable-ConvNets" + owner: "guanfuchen" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "原始仓库Deformable-ConvNets代码注释" } repositories: { - url: "https://github.com/TangDL/DCN" - owner: "TangDL" - framework: FRAMEWORK_TENSORFLOW - description: "DCN" + url: "https://github.com/hangg7/deformable-kernels" + owner: "hangg7" + framework: FRAMEWORK_PYTORCH + number_of_stars: 178 + description: "Deforming kernels to adapt towards object deformation. In ICLR 2020." } repositories: { - url: "https://github.com/tianhai123/deform-conv" - owner: "tianhai123" + url: "https://github.com/qilei123/DEEPLAB_4_RETINA" + owner: "qilei123" framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 } repositories: { - url: "https://github.com/necla-ml/Deformable-ConvNets-py3" - owner: "necla-ml" + url: "https://github.com/fourmi1995/IronExperiment-DCN" + owner: "fourmi1995" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Forked Deformable ConvNets for Python 3" + number_of_stars: 2 + description: "基于DCN(Faster R-CNN,RFCN的钢铁检测实验)" } repositories: { - is_official: true - url: "https://github.com/msracver/Deformable-ConvNets" - owner: "msracver" + url: "https://github.com/qilei123/DeformableConvV2" + owner: "qilei123" framework: FRAMEWORK_OTHERS - number_of_stars: 3537 - description: "Deformable Convolutional Networks" } repositories: { - url: "https://github.com/NVIDIAAICITYCHALLENGE/AICity_Team6_ISU" - owner: "NVIDIAAICITYCHALLENGE" + url: "https://github.com/zengzhaoyang/Weak_Detection" + owner: "zengzhaoyang" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 20 - description: "Source code and code description of Team6_ISU for NVIDIA AICity Challenge 2017 track 1" } repositories: { - url: "https://github.com/qilei123/fpn_crop" - owner: "qilei123" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 + url: "https://github.com/chenbys/GuidedOffset" + owner: "chenbys" + framework: FRAMEWORK_TENSORFLOW } methods: { name: "1x1 Convolution" @@ -246,7 +241,7 @@ pr_id_to_video: { video_id: "RRwaz0fBQ0Y" video_title: "PR-002: Deformable Convolutional Networks (2017)" number_of_likes: 110 - number_of_views: 14503 + number_of_views: 14617 published_date: { seconds: 1492352642 } @@ -273,44 +268,11 @@ pr_id_to_video: { authors: "Fethi Bougares" authors: "Holger Schwenk" authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/park-cheol/pytorch_seq2seq-new-" - owner: "park-cheol" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/roomylee/rnn-text-classification-tf" - owner: "roomylee" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 114 - description: "Tensorflow Implementation of Recurrent Neural Network (Vanilla, LSTM, GRU) for Text Classification" - } - repositories: { - url: "https://github.com/dewanderelex/LanguageTranslation" - owner: "dewanderelex" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/magahub/songrnn" - owner: "magahub" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/starry91/NMT-Lab" - owner: "starry91" - framework: FRAMEWORK_OTHERS - description: "Implementation of Neural machine translation papers" - } - repositories: { - url: "https://github.com/munir-bd/Korean-POS-Tagger-LSTM" - owner: "munir-bd" - framework: FRAMEWORK_OTHERS - } repositories: { url: "https://github.com/trevor-richardson/rnn_zoo" owner: "trevor-richardson" framework: FRAMEWORK_PYTORCH - number_of_stars: 9 + number_of_stars: 10 description: "This repository tests various recurrent neural network architectures on baseline datasets SeqMNIST and pMNIST." } repositories: { @@ -331,9 +293,49 @@ pr_id_to_video: { url: "https://github.com/farizrahman4u/seq2seq" owner: "farizrahman4u" framework: FRAMEWORK_OTHERS - number_of_stars: 3079 + number_of_stars: 3086 description: "Sequence to Sequence Learning with Keras" } + repositories: { + url: "https://github.com/littleflow3r/Sequence_to_sequence_learning_for_machine_translation" + owner: "littleflow3r" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Pytorch implementation of several seq2seq models (Machine translation task, Japanese-English)" + } + repositories: { + url: "https://github.com/graykode/nlp-tutorial" + owner: "graykode" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9224 + description: "Natural Language Processing Tutorial for Deep Learning Researchers" + } + repositories: { + url: "https://github.com/jmyrberg/finnlem" + owner: "jmyrberg" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Neural network based lemmatizer for Finnish language" + } + repositories: { + url: "https://github.com/spratskevich/Lemmatizer" + owner: "spratskevich" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/erickrf/autoencoder" + owner: "erickrf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 229 + description: "Text autoencoder with LSTMs" + } + repositories: { + url: "https://github.com/littleflow3r/Sequence_to_sequence_learning_with_NN" + owner: "littleflow3r" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Pytorch implementation of several seq2seq models (Machine translation task, Japanese-English)" + } methods: { name: "GRU" full_name: "Gated Recurrent Unit" @@ -343,8 +345,8 @@ pr_id_to_video: { video: { video_id: "_Dp8u97_rQ0" video_title: "PR-003:Learning phrase representations using RNN encoder-decoder for statistical machine translation" - number_of_likes: 34 - number_of_views: 6361 + number_of_likes: 35 + number_of_views: 6413 published_date: { seconds: 1495764575 } @@ -369,66 +371,77 @@ pr_id_to_video: { authors: "Kaiming He" authors: "Xiaoou Tang" repositories: { - url: "https://github.com/aba450/Super-Resolution" - owner: "aba450" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/shreeyashyende/better_img_res_with_SRCNN" - owner: "shreeyashyende" + url: "https://github.com/nagadomi/waifu2x" + owner: "nagadomi" framework: FRAMEWORK_OTHERS - number_of_stars: 1 + number_of_stars: 21764 + description: "Image Super-Resolution for Anime-Style Art" } repositories: { - url: "https://github.com/mukul1093/Image-Super-Resolution" - owner: "mukul1093" - framework: FRAMEWORK_OTHERS + url: "https://github.com/Shritesh99/100DaysofMLCodeChallenge" + owner: "Shritesh99" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 23 + description: "A repository dedicated to the #100DaysofMLCode Challenge." } repositories: { - url: "https://github.com/jaivanti/Super-Resolution-using-ConvNet" - owner: "jaivanti" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Reconstructing a high resolution photo-realistic image from its counterpart low resolution image has been a long challenging task in the fraternity of computer vision. This task becomes even more difficult when all you have is a single low resolution image as input to recreate its high resolution image. This can be done using Convolution Neural Networks." + url: "https://github.com/r06922019/butt_lion_paper_notes" + owner: "r06922019" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "So many papers" } repositories: { - url: "https://github.com/Amritha16/ImageResolutionEnhancement" - owner: "Amritha16" - framework: FRAMEWORK_OTHERS - description: "A python implementation of https://arxiv.org/pdf/1501.00092.pdf" + url: "https://github.com/HighVoltageRocknRoll/sr" + owner: "HighVoltageRocknRoll" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 90 + description: "Image and video super resolution" } repositories: { - url: "https://github.com/amzamzamzamz/nagadomi-waifu2x" - owner: "amzamzamzamz" + url: "https://github.com/shreeyashyende/better_img_res_with_SRCNN" + owner: "shreeyashyende" framework: FRAMEWORK_OTHERS + number_of_stars: 1 } repositories: { - url: "https://github.com/vpaliwal1/Deep_learning_SRCNN" - owner: "vpaliwal1" + url: "https://github.com/fourseaforfriend/waifu2x" + owner: "fourseaforfriend" framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/xgd/waifu2xx" - owner: "xgd" - framework: FRAMEWORK_OTHERS + url: "https://github.com/jupiterman/Super-Resolution-Images" + owner: "jupiterman" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 } repositories: { - url: "https://github.com/Weifeng73/Zero-Shot-Super-resolution" - owner: "Weifeng73" + url: "https://github.com/WarrenGreen/srcnn" + owner: "WarrenGreen" framework: FRAMEWORK_OTHERS - description: "Computer Vision Course 2019 Final Project in ZJU " + number_of_stars: 27 + description: "Super Resolution for Satellite Imagery" } repositories: { - url: "https://github.com/ferseiti/reproducibility" - owner: "ferseiti" + url: "https://github.com/titu1994/Image-Super-Resolution" + owner: "titu1994" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 742 + description: "Implementation of Super Resolution CNN in Keras." + } + repositories: { + url: "https://github.com/atheesh1998/Image-Super-Resolution" + owner: "atheesh1998" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Image restoration from a low resolution input." } } video: { video_id: "1jGr_OFyfa0" video_title: "PR-004: Image Super-Resolution Using Deep Convolutional Networks" - number_of_likes: 63 - number_of_views: 9879 + number_of_likes: 62 + number_of_views: 9990 published_date: { seconds: 1492956744 } @@ -456,72 +469,72 @@ pr_id_to_video: { authors: "Daan Wierstra" authors: "Martin Riedmiller" repositories: { - url: "https://github.com/sourenaKhanzadeh/snakeAi" - owner: "sourenaKhanzadeh" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Reinforcement Learning with the classic snake game" + url: "https://github.com/tlohr/nfsu2-ai" + owner: "tlohr" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "This repository is used to create an AI that is able to drive races in the racing game Need for Speed: Underground 2" } repositories: { - url: "https://github.com/LukasGardberg/cartpole" - owner: "LukasGardberg" + url: "https://github.com/parilo/rl-server" + owner: "parilo" framework: FRAMEWORK_TENSORFLOW - description: "Inverse pendulum problem using Deep-Q learning with Keras and OpenAI's Gym" + number_of_stars: 13 + description: "Reinforcement Learning Server" } repositories: { - url: "https://github.com/datamllab/rlcard" - owner: "datamllab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1303 - description: "Reinforcement Learning / AI Bots in Card (Poker) Games - Blackjack, Leduc, Texas, DouDizhu, Mahjong, UNO." + url: "https://github.com/spragunr/deep_q_rl" + owner: "spragunr" + framework: FRAMEWORK_OTHERS + number_of_stars: 1048 + description: "Theano-based implementation of Deep Q-learning" } repositories: { - url: "https://github.com/TheFebrin/DeepRL-Pong" - owner: "TheFebrin" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Deep Reinforcement Learning bot playing Pong game." + url: "https://github.com/blakeMilner/DeepQLearning" + owner: "blakeMilner" + framework: FRAMEWORK_OTHERS + number_of_stars: 130 + description: "A powerful machine learning algorithm utilizing Q-Learning and Neural Networks, implemented using Torch and Lua." } repositories: { - url: "https://github.com/rikluost/RL_DQN_Pong" - owner: "rikluost" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" + url: "https://github.com/pavitrakumar78/Playing-custom-games-using-Deep-Learning" + owner: "pavitrakumar78" + framework: FRAMEWORK_OTHERS + number_of_stars: 25 + description: "Implementation of Google's paper on playing atari games using deep learning in python." } repositories: { - url: "https://github.com/gordicaleksa/pytorch-learn-reinforcement-learning" - owner: "gordicaleksa" - framework: FRAMEWORK_PYTORCH - number_of_stars: 73 - description: "A collection of various RL algorithms like policy gradients, DQN and PPO. The goal of this repo will be to make it a go-to resource for learning about RL. How to visualize, debug and solve RL problems. I've additionally included playground.py for learning more about OpenAI gym, etc." + url: "https://github.com/CankayaUniversity/ceng-407-408-License-Plate-Recognition-Using-Deep-Learning" + owner: "CankayaUniversity" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "License Plate Recognition Using Deep Learning" } repositories: { - url: "https://github.com/Curt-Park/rainbow-is-all-you-need" - owner: "Curt-Park" - framework: FRAMEWORK_OTHERS - number_of_stars: 1027 - description: "Rainbow is all you need! A step-by-step tutorial from DQN to Rainbow" + url: "https://github.com/sunjeet95/Deep-Q-Network-using-Tensorflow" + owner: "sunjeet95" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "This repository contains Deep Q-Networks and Double DQN implementation in tensorflow for Open AI Gym environments." } repositories: { - url: "https://github.com/epignatelli/human-level-control-through-deep-reinforcement-learning" - owner: "epignatelli" + url: "https://github.com/mfregeau/DeepLearning" + owner: "mfregeau" framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "A jax/stax implementation of: Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G. and Petersen, S., 2015. Human-level control through deep reinforcement learning. nature, 518(7540), pp.529-533." } repositories: { - url: "https://github.com/rishavb123/MineRL" - owner: "rishavb123" + url: "https://github.com/Linging/Traffic-Signal-Control" + owner: "Linging" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Applies the Deep Q Learning algorithm using a convolutional neural network to have an agent learn to fight zombies in a closed minecraft environment. This is done using Microsoft's Project Malmo (to create the environment) and tensorflow/keras to structure the network." + number_of_stars: 30 + description: "Intelligent traffic control on Vissim by dqn" } repositories: { - url: "https://github.com/eddynelson/dqn" - owner: "eddynelson" + url: "https://github.com/rikluost/RL_DQN_Pong" + owner: "rikluost" framework: FRAMEWORK_TENSORFLOW - description: "Deep Q-Networks Implementation with tensorflow 2.x" + number_of_stars: 1 + description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" } methods: { name: "Convolution" @@ -558,7 +571,7 @@ pr_id_to_video: { video_id: "V7_cNTfm2i8" video_title: "PR-005: Playing Atari with Deep Reinforcement Learning (NIPS 2013 Deep Learning Workshop)" number_of_likes: 53 - number_of_views: 8218 + number_of_views: 8252 published_date: { seconds: 1494165820 } @@ -581,36 +594,6 @@ pr_id_to_video: { authors: "Alex Graves" authors: "Greg Wayne" authors: "Ivo Danihelka" - repositories: { - url: "https://github.com/dgedon/lightning-ntm" - owner: "dgedon" - framework: FRAMEWORK_PYTORCH - description: "PyTorch Lightning implementation of Neural Turing Machine (NTM)." - } - repositories: { - url: "https://github.com/theneuralbeing/ntm" - owner: "theneuralbeing" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "A PyTorch Implementation of Neural Turing Machine" - } - repositories: { - url: "https://github.com/mdabagia/NeuralTuringMachine" - owner: "mdabagia" - framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of the neural Turing machine architecture" - } - repositories: { - url: "https://github.com/rs9000/Neural-Turing-machine" - owner: "rs9000" - framework: FRAMEWORK_PYTORCH - description: "NTM in PyTorch" - } - repositories: { - url: "https://github.com/shanyaanand/ntm" - owner: "shanyaanand" - framework: FRAMEWORK_OTHERS - } repositories: { url: "https://github.com/camigord/Neural-Turing-Machine" owner: "camigord" @@ -622,7 +605,7 @@ pr_id_to_video: { url: "https://github.com/loudinthecloud/pytorch-ntm" owner: "loudinthecloud" framework: FRAMEWORK_PYTORCH - number_of_stars: 467 + number_of_stars: 470 description: "Neural Turing Machines (NTM) - PyTorch Implementation" } repositories: { @@ -636,7 +619,7 @@ pr_id_to_video: { url: "https://github.com/MarkPKCollier/NeuralTuringMachine" owner: "MarkPKCollier" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 561 + number_of_stars: 563 description: "Tensorflow implementation of a Neural Turing Machine" } repositories: { @@ -646,6 +629,39 @@ pr_id_to_video: { number_of_stars: 266 description: "Neural Turing Machine (NTM) & Differentiable Neural Computer (DNC) with pytorch & visdom" } + repositories: { + url: "https://github.com/snowkylin/ntm" + owner: "snowkylin" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 171 + description: "TensorFlow implementation of Neural Turing Machines (NTM), with its application on one-shot learning (MANN)" + } + repositories: { + url: "https://github.com/jiankaiwang/dnc-py3" + owner: "jiankaiwang" + framework: FRAMEWORK_TENSORFLOW + description: "a tutorial for Differentiable Neural Computer (DNC) in python3" + } + repositories: { + url: "https://github.com/chiggum/Neural-Turing-Machines" + owner: "chiggum" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 + description: "An attempt at replicating Deepmind's Neural Turing Machines in Theano" + } + repositories: { + url: "https://github.com/philippe554/MANN" + owner: "philippe554" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 12 + description: "Neural Turing Machine" + } + repositories: { + url: "https://github.com/ajithcodesit/lstm_copy_task" + owner: "ajithcodesit" + framework: FRAMEWORK_TENSORFLOW + description: "LSTM copy task in which a pattern is stored in memory and reproduced again" + } methods: { name: "Content-based Attention" full_name: "Content-based Attention" @@ -676,7 +692,7 @@ pr_id_to_video: { video_id: "2wbDiZCWQtY" video_title: "PR-006: Neural Turing Machine" number_of_likes: 41 - number_of_views: 5094 + number_of_views: 5113 published_date: { seconds: 1494447474 } @@ -701,73 +717,75 @@ pr_id_to_video: { authors: "Eli Shechtman" authors: "Kavita Bala" repositories: { - url: "https://github.com/YooJiHyeong/SinIR" - owner: "YooJiHyeong" - framework: FRAMEWORK_PYTORCH - number_of_stars: 32 - description: "Official implementation of \"SinIR: Efficient General Image Manipulation with Single Image Reconstruction\" (ICML 2021)" - } - repositories: { - url: "https://github.com/EvanLi/Github-Ranking" + url: "https://github.com/EvanLi/github-most-stars-forks" owner: "EvanLi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 488 + number_of_stars: 553 description: ":star:Github Ranking:star: Github stars and forks ranking list. Github Top100 stars list of different languages. Automatically update daily. | Github仓库排名,每日自动更新" } repositories: { - url: "https://github.com/EvanLi/github-most-stars-forks" + url: "https://github.com/EvanLi/Github-Ranking" owner: "EvanLi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 488 + number_of_stars: 553 description: ":star:Github Ranking:star: Github stars and forks ranking list. Github Top100 stars list of different languages. Automatically update daily. | Github仓库排名,每日自动更新" } repositories: { - url: "https://github.com/LouieYang/deep-photo-styletransfer-tf" - owner: "LouieYang" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 775 - description: "Tensorflow (Python API) implementation of Deep Photo Style Transfer" + url: "https://github.com/leolle/StyleTransfer" + owner: "leolle" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } repositories: { - url: "https://github.com/fatihky/starred" - owner: "fatihky" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 + url: "https://github.com/IdanAzuri/pytorch_fewshot_with_deep_photo" + owner: "IdanAzuri" + framework: FRAMEWORK_PYTORCH + description: "Experiments with few shot learning and fast photo transfer" } repositories: { - url: "https://github.com/alexanderivanov2424/CSCI-1430-Final-Project" - owner: "alexanderivanov2424" + url: "https://github.com/dedekinds/The-Color-Transfer-of-Animes-Characters-Images" + owner: "dedekinds" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + number_of_stars: 14 + description: "The final project of Advance Machine Learning course in Tsinghua University. This project aims to make a color transfer of animes characters' images and improve the images' quality after color transferring in low quality pictures. " } repositories: { - url: "https://github.com/ucsd-dsc-arts/dsc160-final-dsc160_final_group4" - owner: "ucsd-dsc-arts" - framework: FRAMEWORK_TENSORFLOW - description: "dsc160-final-dsc160_final_group4 created by GitHub Classroom" + url: "https://github.com/mandarin4452/StyleTune" + owner: "mandarin4452" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/ritesh2212/DeepPhotoStyle_pytorch-master" - owner: "ritesh2212" - framework: FRAMEWORK_PYTORCH + is_official: true + url: "https://github.com/luanfujun/deep-photo-styletransfer" + owner: "luanfujun" + framework: FRAMEWORK_OTHERS + number_of_stars: 9801 + description: "Code and data for paper \"Deep Photo Style Transfer\": https://arxiv.org/abs/1703.07511 " } repositories: { - url: "https://github.com/johnsun03/myTest" - owner: "johnsun03" + url: "https://github.com/xuberance137/styleflow" + owner: "xuberance137" + framework: FRAMEWORK_TENSORFLOW + description: "Algorithmic explorations with stylistic image transfer" + } + repositories: { + url: "https://github.com/purushothamgowthu/deep-photo-styletransfer" + owner: "purushothamgowthu" framework: FRAMEWORK_OTHERS - description: "one test" } repositories: { - url: "https://github.com/muriloime/awesome-stars" - owner: "muriloime" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/clovaai/WCT2" + owner: "clovaai" + framework: FRAMEWORK_PYTORCH + number_of_stars: 659 + description: "Software that can perform photorealistic style transfer without the need of any post-processing steps." } } video: { video_id: "YF6nLVDlznE" video_title: "PR-007: Deep Photo Style Transfer" - number_of_likes: 29 - number_of_views: 5745 + number_of_likes: 30 + number_of_views: 5768 published_date: { seconds: 1494826006 } @@ -814,77 +832,76 @@ pr_id_to_video: { authors: "Oriol Vinyals" authors: "Jeff Dean" repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/distillation" - owner: "labml_nn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + url: "https://github.com/kaung-htet-myat/Multi-teachers-Knowledge-Distillation" + owner: "kaung-htet-myat" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Distilling knowledge from ensemble of multiple teacher networks to student network with multiple heads" } repositories: { - url: "https://github.com/JunzWu/Distilling-the-Knowledge-in-a-Neural-Network" - owner: "JunzWu" + url: "https://github.com/altndrr/persona" + owner: "altndrr" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 1 + description: "[WIP] Research project on face recognition" } repositories: { - url: "https://github.com/jaychoi12/LG_KD" - owner: "jaychoi12" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "LG paper review QnA session - Knowledge Distillation" + url: "https://github.com/MinJunKang/ImageNet-Knowledge-Distillation" + owner: "MinJunKang" + framework: FRAMEWORK_TENSORFLOW + description: "Knowledge Distillation code with ImageNet" } repositories: { - url: "https://github.com/yoshitomo-matsubara/torchdistill" - owner: "yoshitomo-matsubara" + url: "https://github.com/MasLiang/Learning-without-Forgetting-using-Pytorch" + owner: "MasLiang" framework: FRAMEWORK_PYTORCH - number_of_stars: 330 - description: "PyTorch-based modular, configuration-driven framework for knowledge distillation. 🏆18 methods presented at CVPR, ICLR, ECCV, NeurIPS, ICCV, etc are implemented so far. 🎁 Trained models, training logs and configurations are available for ensuring the reproducibiliy and benchmark." - } - repositories: { - url: "https://github.com/franknb/Text-Summarization" - owner: "franknb" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "An experimental repo for testing effective text summarization tools." - } - repositories: { - url: "https://github.com/TakieddineSOUALHI/Transfer_learning" - owner: "TakieddineSOUALHI" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 + number_of_stars: 10 + description: "This is the Pytorch implementation of LwF" } repositories: { - url: "https://github.com/millenialSpirou/ift6010" - owner: "millenialSpirou" + url: "https://github.com/MinJunKang/Knowledge-Distillation_MNIST" + owner: "MinJunKang" framework: FRAMEWORK_TENSORFLOW + description: "Single Teacher to Single Student model knowledge distillation using DNN Structure" } repositories: { - url: "https://github.com/MasLiang/Learning-without-Forgetting-using-Pytorch" - owner: "MasLiang" + url: "https://github.com/wonbeomjang/Knowledge-Distilling-PyTorch" + owner: "wonbeomjang" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "This is the Pytorch implementation of LwF" + number_of_stars: 3 + description: "Implementation of Distilling the Knowledge in a Neural Network https://arxiv.org/pdf/1503.02531.pdf" } repositories: { - url: "https://github.com/KaiyuYue/mgd" - owner: "KaiyuYue" + url: "https://github.com/jaychoi12/LG_KD" + owner: "jaychoi12" framework: FRAMEWORK_PYTORCH - number_of_stars: 37 - description: "Matching Guided Distillation (ECCV 2020)" + number_of_stars: 2 + description: "LG paper review QnA session - Knowledge Distillation" } repositories: { - url: "https://github.com/see--/speech_recognition" - owner: "see--" + url: "https://github.com/mckunkel/DistillingObjectDetector" + owner: "mckunkel" + framework: FRAMEWORK_OTHERS + description: "Obsolete " + } + repositories: { + url: "https://github.com/MinJunKang/CIFAR-100--Knowledge-Distillation-with-Augmented-Data" + owner: "MinJunKang" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 20 - description: "This repo contains my part of the code for our winning entry in the TensorFlow Speech Recognition Challenge hosted by kaggle" + number_of_stars: 2 + } + repositories: { + url: "https://github.com/yangze01/Distilling_the_Knowledge_in_a_Neural_Network_pytorch" + owner: "yangze01" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 } } video: { video_id: "tOItokBZSfU" video_title: "PR-009: Distilling the Knowledge in a Neural Network (Slide: English, Speaking: Korean)" - number_of_likes: 44 - number_of_views: 6498 + number_of_likes: 46 + number_of_views: 6578 published_date: { seconds: 1495514577 } @@ -907,68 +924,69 @@ pr_id_to_video: { authors: "Diederik P Kingma" authors: "Max Welling" repositories: { - url: "https://gitlab.com/bpaassen/ast2vec" - owner: "bpaassen" + url: "https://github.com/harish678/PyTorch-Lightning-Examples/blob/master/vae_lightning.py" + owner: "master" framework: FRAMEWORK_PYTORCH - description: "The ast2vec neural network to translate Python syntax trees to vectors and back." + number_of_stars: 1 + description: "Models implemented in PyTorch Lightning" } repositories: { - url: "https://github.com/ngiann/ApproximateVI.jl" - owner: "ngiann" - framework: FRAMEWORK_OTHERS - description: "Approximate variational inference in Julia" + url: "https://github.com/GuHongyang/VAEs" + owner: "GuHongyang" + framework: FRAMEWORK_PYTORCH + description: "Variant models of VAE based on pytorch" } repositories: { - url: "https://github.com/nghorbani/human_body_prior" - owner: "nghorbani" - framework: FRAMEWORK_PYTORCH - number_of_stars: 304 - description: "VPoser: Variational Human Pose Prior" + url: "https://github.com/enalisnick/stick-breaking_dgms" + owner: "enalisnick" + framework: FRAMEWORK_OTHERS + number_of_stars: 83 + description: "Deep Generative Models with Stick-Breaking Priors" } repositories: { - url: "https://github.com/lanzhang128/disentanglement" - owner: "lanzhang128" + url: "https://github.com/y0ast/VAE-TensorFlow" + owner: "y0ast" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 + number_of_stars: 196 + description: "Implementation of a Variational Auto-Encoder in TensorFlow" } repositories: { - url: "https://github.com/carbonati/variational-zoo" - owner: "carbonati" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 15 - description: "Variational inference and disentangled representations through unsupervised learning" + url: "https://github.com/taohu88/BayesianML" + owner: "taohu88" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/tonystevenj/vae-celeba-pytorch-lightning" - owner: "tonystevenj" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Valinna VAE implemented in pytorch-lightning, trained through Celeba dataset" + url: "https://github.com/adityabingi/Beta-VAE" + owner: "adityabingi" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "Tensorflow implementation of Beta-Variational-AutoEncoder for CelebA dataset" } repositories: { - url: "https://github.com/leokster/CVAE" - owner: "leokster" + url: "https://github.com/AlexanderBogatko/Keras-CCVAE" + owner: "AlexanderBogatko" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Keras implementation of conditional convolutional VAE." } repositories: { - url: "https://github.com/selimseker/logogram-language-generator" - owner: "selimseker" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + url: "https://github.com/yjucho1/articles" + owner: "yjucho1" + framework: FRAMEWORK_OTHERS + number_of_stars: 82 + description: "Papers I read" } repositories: { - url: "https://github.com/shinshoji01/Style-Restricted_GAN" - owner: "shinshoji01" + url: "https://github.com/kuc2477/pytorch-vae" + owner: "kuc2477" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "This repository is to introduce our model, Style-Restricted GAN." + number_of_stars: 28 + description: "PyTorch implementation of \"Auto-Encoding Variational Bayes\", arxiv:1312.6114" } repositories: { - url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/vae/vae.py" - owner: "vae" - framework: FRAMEWORK_PYTORCH - number_of_stars: 156 - description: "Easy generative modeling in PyTorch." + url: "https://github.com/DylanSpicker/judging-covers" + owner: "DylanSpicker" + framework: FRAMEWORK_OTHERS } methods: { name: "VAE" @@ -983,8 +1001,8 @@ pr_id_to_video: { video: { video_id: "KYA-GEhObIs" video_title: "PR-010: Auto-Encoding Variational Bayes, ICLR 2014" - number_of_likes: 205 - number_of_views: 12220 + number_of_likes: 206 + number_of_views: 12348 published_date: { seconds: 1495549847 } @@ -1009,70 +1027,69 @@ pr_id_to_video: { authors: "Andrew Zisserman" authors: "Koray Kavukcuoglu" repositories: { - url: "https://github.com/dabane-ghassan/int-lab-book" - owner: "dabane-ghassan" + url: "https://github.com/renato145/stn" + owner: "renato145" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "\"Foveated Spatial Transformers\", benchmarking Spatial Transformer Networks against a bio-inspired artificial vision model." + description: "Spatial Transformer Networks implementation using fastai+pytorch" } repositories: { - url: "https://github.com/vinod377/STN-OCR" - owner: "vinod377" + url: "https://github.com/ppriyank/Deep-neural-network-for-traffic-sign-recognition-systems" + owner: "ppriyank" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Pytorch Implementation of Deep neural network for traffic sign recognition systems: An analysis of spatial transformers and stochastic optimisation methods" + } + repositories: { + url: "https://github.com/Sooram/stn" + owner: "Sooram" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Implementation of \"STN-OCR: A single Neural Network for Text Detection and Text Recognition\" in natural Scenes by Christian Bartz." + description: "Implementation of Spatial Transformer Networks in Tensorflow" } repositories: { - url: "https://github.com/sayakpaul/Spatial-Transformer-Networks-with-Keras" - owner: "sayakpaul" + url: "https://github.com/moodstocks/gtsrb.torch" + owner: "moodstocks" framework: FRAMEWORK_OTHERS - number_of_stars: 16 - description: "This repository provides a Colab Notebook that shows how to use Spatial Transformer Networks inside CNNs build in Keras." + number_of_stars: 165 + description: "Traffic sign recognition with Torch" } repositories: { - url: "https://github.com/TencentYoutuResearch/SelfSupervisedLearning-DSM" - owner: "TencentYoutuResearch" + url: "https://github.com/darr/spatial_transformer_networks" + owner: "darr" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "code for AAAI21 paper \"Enhancing Unsupervised Video Representation Learning by Decoupling the Scene and the Motion“" - } - repositories: { - url: "https://github.com/dedhiaparth98/spatial-transformer-network" - owner: "dedhiaparth98" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Spatial Transformer Network (STN) provides attention to a particular region to in an image, by doing transformation to the input image. The code in this repository does Affine transformation to image, but other transformation can be explored." + description: "implement spatial transformer networks with mnist" } repositories: { - url: "https://github.com/chenwuperth/rgz_rcnn" - owner: "chenwuperth" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 36 - description: "ClaRAN: A deep learning classifier for radio morphologies" + url: "https://github.com/shubhtuls/stn3d" + owner: "shubhtuls" + framework: FRAMEWORK_OTHERS + number_of_stars: 11 + description: "3D Spatial Transformer Network" } repositories: { - url: "https://github.com/FingerRec/DSM" - owner: "FingerRec" - framework: FRAMEWORK_OTHERS - number_of_stars: 44 - description: "[AAAI2021] The source code for our paper 《Enhancing Unsupervised Video Representation Learning by Decoupling the Scene and the Motion》." + url: "https://github.com/eonr/CompVision" + owner: "eonr" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Some of my computer vision notebooks" } repositories: { - url: "https://github.com/tianyu-tristan/Visual-Attention-Model" - owner: "tianyu-tristan" + url: "https://github.com/mimikaan/Attention-Model" + owner: "mimikaan" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 } repositories: { - url: "https://github.com/elisiojsj/Kuzushiji-49" - owner: "elisiojsj" - framework: FRAMEWORK_PYTORCH - description: "Classifier for Kuzushiji (Japanese calligraphy) characters." + url: "https://github.com/qassemoquab/stnbhwd" + owner: "qassemoquab" + framework: FRAMEWORK_OTHERS + number_of_stars: 367 + description: "Modules for spatial transformer networks (BHWD layout)" } repositories: { - url: "https://github.com/Mugilvanan/stnbhwd" - owner: "Mugilvanan" - framework: FRAMEWORK_OTHERS + url: "https://github.com/IBM/MAX-Spatial-Transformer-Network" + owner: "IBM" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Train a neural network component that can add spatial transformations such as translation and rotation to larger models." } methods: { name: "Spatial Transformer" @@ -1093,8 +1110,8 @@ pr_id_to_video: { video: { video_id: "Rv3osRZWGbg" video_title: "PR-011: Spatial Transformer Networks" - number_of_likes: 46 - number_of_views: 5482 + number_of_likes: 48 + number_of_views: 5571 published_date: { seconds: 1495978512 } @@ -1119,71 +1136,68 @@ pr_id_to_video: { authors: "Ross Girshick" authors: "Jian Sun" repositories: { - url: "https://github.com/boom85423/Seq2seq-model-for-Meme-Generator" - owner: "boom85423" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 + url: "https://github.com/wayne1204/NOAA-fish-finding" + owner: "wayne1204" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Intern project at Institude of Information Science, Academia Sinica " } repositories: { - url: "https://github.com/aleksispi/drl-rpn-tf" - owner: "aleksispi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 68 - description: "Official Tensorflow implementation of drl-RPN: Deep Reinforcement Learning of Region Proposal Networks (CVPR 2018 paper)" + url: "https://github.com/vincentzhang/faster-rcnn-fcn" + owner: "vincentzhang" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "ROI FCN, End-to-end Segmentation on Regional Proposals" } repositories: { - url: "https://github.com/miaohua1982/simple_fasterrcnn_pytorch" - owner: "miaohua1982" + url: "https://github.com/facebookresearch/detectron" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 24562 + description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." } repositories: { - url: "https://github.com/JeffCHEN2017/WSSTG" - owner: "JeffCHEN2017" - framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "This repository contains the main baselines introduced in WSSTG (ACL 2019)." + url: "https://github.com/ithuanhuan/py-fatser-rcnn" + owner: "ithuanhuan" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/VDIGPKU/OPANAS" - owner: "VDIGPKU" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - description: "The official code for OPANAS: One-Shot Path Aggregation Network Architecture Search for Object Detection" + url: "https://github.com/noelcodes/cloth-recognition-version2" + owner: "noelcodes" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "cloth recognition version 2: This is my 2nd attempt on this topic." } repositories: { - url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" - owner: "KostadinovShalon" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + url: "https://github.com/bareblackfoot/lddp-tf-faster-rcnn" + owner: "bareblackfoot" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Tensorflow implementation of LDDP" } repositories: { - url: "https://github.com/potterhsu/easy-faster-rcnn.pytorch" - owner: "potterhsu" - framework: FRAMEWORK_PYTORCH - number_of_stars: 150 - description: "An easy implementation of Faster R-CNN (https://arxiv.org/pdf/1506.01497.pdf) in PyTorch." + url: "https://github.com/guanfuchen/py-faster-rcnn" + owner: "guanfuchen" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "py-faster-rcnn源码阅读笔记" } repositories: { - url: "https://github.com/zhudelong/elevator_button_recognition" - owner: "zhudelong" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 - description: "Button recognition for autonomous elevator operation" + url: "https://github.com/godspeedcurry/lung-nodule-detection" + owner: "godspeedcurry" + framework: FRAMEWORK_OTHERS + description: "My srtp project" } repositories: { - url: "https://github.com/EmGarr/kerod" - owner: "EmGarr" + url: "https://github.com/nautilus261/tf-faster-rcnn" + owner: "nautilus261" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 36 - description: "DETR - Faster RCNN implementation in tensorflow 2" } repositories: { - url: "https://github.com/liangheming/faster_rcnnv1" - owner: "liangheming" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "pytorch implement of fasterRCNN,736px(max side),39.4mAP(COCO),30.21fps(RTX 2080TI)" + url: "https://github.com/yzgrfsy/tf-fastrcnn-crop" + owner: "yzgrfsy" + framework: FRAMEWORK_TENSORFLOW + description: "output tensorboard in https://pan.baidu.com/s/1PDd7wY7XZzlrCGB-56jsEA" } methods: { name: "RPN" @@ -1223,8 +1237,8 @@ pr_id_to_video: { video: { video_id: "kcPAGIgBGRs" video_title: "PR-012: Faster R-CNN : Towards Real-Time Object Detection with Region Proposal Networks" - number_of_likes: 392 - number_of_views: 48992 + number_of_likes: 400 + number_of_views: 49400 published_date: { seconds: 1495981094 } @@ -1253,52 +1267,16 @@ pr_id_to_video: { authors: "Mario Marchand" authors: "Victor Lempitsky" repositories: { - url: "https://github.com/criteo-research/pytorch-ada" - owner: "criteo-research" - framework: FRAMEWORK_PYTORCH - number_of_stars: 55 - description: "Another Domain Adaptation library, aimed at researchers." - } - repositories: { - url: "https://github.com/rpryzant/proxy-a-distance" - owner: "rpryzant" - framework: FRAMEWORK_OTHERS - number_of_stars: 30 - description: "Proxy A-Distance algorithm for measuring domain disparity in parallel corpora" - } - repositories: { - url: "https://github.com/JorisRoels/domain-adaptive-segmentation" - owner: "JorisRoels" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - description: "Domain adaptation segmentation for volume EM imaging" - } - repositories: { - url: "https://github.com/facebookresearch/DomainBed" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 363 - description: "DomainBed is a suite to test domain generalization algorithms" - } - repositories: { - url: "https://github.com/monkey0head/Domain_Adaptation_thesis" - owner: "monkey0head" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/AnonymousAccount0/WANN" + owner: "AnonymousAccount0" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 - description: "Source code for master thesis on Unsupervised Domain Adaptation for Image Processing " } repositories: { - url: "https://github.com/dv-fenix/Domain-Adaptation" - owner: "dv-fenix" + url: "https://github.com/MarvinMartin24/MADA-PL" + owner: "MarvinMartin24" framework: FRAMEWORK_PYTORCH - description: "PyTorch implementations of some papers on Domain Adaptation" - } - repositories: { - url: "https://github.com/Nadavc220/DomainAdversarialTrainingOfNeuralNetworks" - owner: "Nadavc220" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "This is a Pytorch implementation of the 2014 paper named Domain Adversarial Training of Neural Networks " + description: "Multi-Adversarial Domain Adaptation (https://arxiv.org/abs/1809.02176) implementation in Pytorch-Lightning" } repositories: { url: "https://github.com/asahi417/DeepDomainAdaptation" @@ -1321,12 +1299,47 @@ pr_id_to_video: { number_of_stars: 26 description: "A Universal Music Translation Network Implementation" } + repositories: { + url: "https://github.com/domainadaptation/salad" + owner: "domainadaptation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 281 + description: "A toolbox for domain adaptation and semi-supervised learning. Contributions welcome." + } + repositories: { + url: "https://github.com/monkey0head/Domain_Adaptation_thesis" + owner: "monkey0head" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Source code for master thesis on Unsupervised Domain Adaptation for Image Processing " + } + repositories: { + url: "https://github.com/ShichengChen/Domain-Adversarial-Training-of-Neural-Networks" + owner: "ShichengChen" + framework: FRAMEWORK_PYTORCH + number_of_stars: 44 + description: "implement Domain-Adversarial Training of Neural Networks" + } + repositories: { + url: "https://github.com/vihari/crossgrad" + owner: "vihari" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 20 + description: "This repository contains implementation of CROSSGRAD (https://openreview.net/forum?id=r1Dx7fbCW) and DAN (https://arxiv.org/abs/1505.07818)." + } + repositories: { + url: "https://github.com/erlendd/ddan" + owner: "erlendd" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 41 + description: "Deep domain adaptation networks (DDAN) library for Python with TensorFlow." + } } video: { video_id: "n2J7giHrS-Y" video_title: "PR-013: Domain Adversarial Training of Neural Network" number_of_likes: 52 - number_of_views: 5921 + number_of_views: 5998 published_date: { seconds: 1496675287 } @@ -1349,21 +1362,11 @@ pr_id_to_video: { authors: "Julieta Martinez" authors: "Michael J. Black" authors: "Javier Romero" - repositories: { - url: "https://github.com/nageshpindi/human-motion-prediction-master" - owner: "nageshpindi" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/YQRickWang/tf" - owner: "YQRickWang" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/facebookresearch/QuaterNet" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 605 + number_of_stars: 612 description: "Proposes neural networks that can generate animation of virtual characters for different actions." } repositories: { @@ -1371,7 +1374,7 @@ pr_id_to_video: { url: "https://github.com/una-dinosauria/human-motion-prediction" owner: "una-dinosauria" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 303 + number_of_stars: 307 description: "Simple baselines and RNNs for predicting human motion in tensorflow. Presented at CVPR 17." } repositories: { @@ -1387,12 +1390,22 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH description: "Motion Prediciton on deepfly" } + repositories: { + url: "https://github.com/YQRickWang/tf" + owner: "YQRickWang" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/nageshpindi/human-motion-prediction-master" + owner: "nageshpindi" + framework: FRAMEWORK_TENSORFLOW + } } video: { video_id: "Y1loN3Sc4Dk" video_title: "PR-014: On Human Motion Prediction using RNNs (2017)" - number_of_likes: 53 - number_of_views: 5128 + number_of_likes: 55 + number_of_views: 5152 published_date: { seconds: 1496611967 } @@ -1414,78 +1427,76 @@ pr_id_to_video: { } authors: "Yoon Kim" repositories: { - url: "https://github.com/xiaoxinyu1997/Knowledge-Graph" - owner: "xiaoxinyu1997" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "天池比赛" - } - repositories: { - url: "https://github.com/zhouyixin829/Analysis-on-Stop-Asian-Hate-trend-from-Twitter" - owner: "zhouyixin829" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "CIS600 team project" - } - repositories: { - url: "https://github.com/chiemenz/AzureML-Sentiment-Classification-and-Model-Deployment" - owner: "chiemenz" + url: "https://github.com/DataZwer/CNNTextClassification" + owner: "DataZwer" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + description: "第一个调通、看懂了的TensorFlow代码" } repositories: { - url: "https://github.com/GayeonKim-data/section4-project" - owner: "GayeonKim-data" - framework: FRAMEWORK_OTHERS - description: "딥러닝을 활용한 영화 리뷰 속 스포일러 탐지 프로젝트" + url: "https://github.com/afrozloya/charrec" + owner: "afrozloya" + framework: FRAMEWORK_TENSORFLOW } repositories: { url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/contentunderstanding/textcnn" owner: "contentunderstanding" framework: FRAMEWORK_OTHERS - number_of_stars: 556 + number_of_stars: 594 description: "大规模推荐模型训练工具" } repositories: { - url: "https://github.com/guanliu321/CNN-RNN-HAN-for-Text-Classification-Using-NLP" - owner: "guanliu321" + url: "https://github.com/SeonbeomKim/TensorFlow-TextCNN" + owner: "SeonbeomKim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "TextCNN (Convolutional Neural Networks for Sentence Classification - Yoon Kim)" + } + repositories: { + url: "https://github.com/shagunsodhani/CNN-Sentence-Classifier" + owner: "shagunsodhani" framework: FRAMEWORK_OTHERS + number_of_stars: 138 + description: "Implementation of \"Convolutional Neural Networks for Sentence Classification\" paper" + } + repositories: { + url: "https://github.com/avinashsai/Convolutional-Neural-Networks-for-Text-Classification" + owner: "avinashsai" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "It’s a NLP Problem,the goal of our project is to classify categories of news based on the content of news articles from the BBC website using CNN, RNN and HAN models on two datasets that the former dataset have 2225 news, 5 categories and the latter dataset have 18846 news, 20 categories. Set hyperparameters, such as embedding dimensions of glove model, trainable parameter of embedding layer, bidirectional LSTM or simple LSTM Preprocess the news articles, including removing punctuation ,stopwords, lemmatization,removing outliers in terms of news length and the number of sentences and set the corresponding parameters Tokenize the data using word-index which is fit on the train data,then generate 2D input data (article, word) for CNN and RNN algorithms,and then generate 3D input data (article, sentence, word) for HAN algorithm Use set hyperparameters to build the model architecture and use checkpointing, early stopping to train model, and then compare the test accuracy and validation loss of these three models Utilized:Python,Pandas,Numpy,Seaborn,Matplolib,NLP,DNN,CNN,RNN,HAN,LSTM,GPU,Text Classification,Hyperparameters Tuning" + description: "Implementation of CNN for Text Classification" } repositories: { - url: "https://github.com/dongjun-Lee/text-classification-models-tf" - owner: "dongjun-Lee" + url: "https://github.com/Johnnylyu/Text-Mining-and-Classification-for-Attain" + owner: "Johnnylyu" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 486 - description: "Tensorflow implementations of Text Classification Models." + number_of_stars: 2 } repositories: { - url: "https://github.com/yinghao1019/NLP_and_DL_practice/blob/master/Convolution_Neural_Netowrks_for_sentence_classification_Practice.ipynb" - owner: "master" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "This repository is used for NLP Model practice and learning" + url: "https://github.com/JiachuanDENG/Pytorch-CNN-Textual-RegressionModel-for-commodity-price-prediction" + owner: "JiachuanDENG" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "CNN Textual RegressionModel for commodity price prediction" } repositories: { - url: "https://github.com/yinghao1019/NLP_and_DL_practice" - owner: "yinghao1019" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "This repository is used for NLP Model practice and learning" + url: "https://github.com/machine-learning-study-group/movie-sentiment-analysis" + owner: "machine-learning-study-group" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Sentiment analysis of movie (IMDB) reviews using dataset provided by the ACL 2011 paper" } repositories: { - url: "https://github.com/chiemenz/automl_vs_hyperdrive" - owner: "chiemenz" + url: "https://github.com/lrank/Robust-Representation" + owner: "lrank" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + number_of_stars: 8 } } video: { video_id: "IRB2vXSet2E" video_title: "PR-015:Convolutional Neural Networks for Sentence Classification" number_of_likes: 49 - number_of_views: 5810 + number_of_views: 5874 published_date: { seconds: 1497187460 } @@ -1510,122 +1521,126 @@ pr_id_to_video: { authors: "Ross Girshick" authors: "Ali Farhadi" repositories: { - url: "https://github.com/westerndigitalcorporation/YOLOv3-in-PyTorch" - owner: "westerndigitalcorporation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 84 - description: "YOLOv3 in PyTorch with training and inference module implemented." - } - repositories: { - url: "https://github.com/AlexeyAB/darknet" - owner: "AlexeyAB" + url: "https://github.com/eric-erki/android-yolo" + owner: "eric-erki" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16682 - description: "YOLOv4 / Scaled-YOLOv4 / YOLO - Neural Networks for Object Detection (Windows and Linux version of Darknet )" + number_of_stars: 2 + description: "Real-time object detection on Android using the YOLO network with TensorFlow" } repositories: { - url: "https://github.com/DevBruce/YOLOv1-TF2" - owner: "DevBruce" + url: "https://github.com/WaelOuni/MergeTenserFlowWithOdb" + owner: "WaelOuni" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "YOLOv1 implementation with TensorFlow2" } repositories: { - url: "https://github.com/msuhail1997/YOLO-Pytorch-Object_Detection" - owner: "msuhail1997" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/manankshastri/Object-Detection" + owner: "manankshastri" + framework: FRAMEWORK_TENSORFLOW + description: "Object Detection on a car detection dataset using YOLO" } repositories: { - url: "https://github.com/jalotra/Queue-Detection" - owner: "jalotra" - framework: FRAMEWORK_PYTORCH - description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue{q}, how many people are standing in queue{Q}.\"" + url: "https://github.com/natanielruiz/android-yolo" + owner: "natanielruiz" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 607 + description: "Real-time object detection on Android using the YOLO network with TensorFlow" } repositories: { - url: "https://github.com/jalotra/Queue-Detection-" - owner: "jalotra" - framework: FRAMEWORK_PYTORCH - description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue{q}, how many people are standing in queue{Q}.\"" + url: "https://github.com/Ereebay/Deep-Learning-Documents" + owner: "Ereebay" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Documents for DAI group" } repositories: { - url: "https://github.com/TeamML-2021/knowledge-base" - owner: "TeamML-2021" - framework: FRAMEWORK_OTHERS + url: "https://github.com/noelcodes/YOLO" + owner: "noelcodes" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Exploring YOLO. Credits to Mark Jay and darkflow." } repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/object_detection/you_only_look_once" - owner: "object_detection" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RobbertBrand/Yolo-Tensorflow-Implementation" + owner: "RobbertBrand" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 2 - description: "ai codebase" + description: "A Yolo object detection implementation in Tensorflow, trainable using Tensorflow optimizers like ADAM. " } repositories: { - url: "https://github.com/hamidriasat/Computer-Vision-and-Deep-Learning" - owner: "hamidriasat" + url: "https://github.com/Stick-To/YOLO-TF" + owner: "Stick-To" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11 + description: "YOLOv2 YOLOv3 in pure tensorflow" } repositories: { - url: "https://github.com/ritesh2448/Text-Detection-And-Recognition" - owner: "ritesh2448" + url: "https://github.com/keshav47/Face-Recognition-And-Verification" + owner: "keshav47" framework: FRAMEWORK_TENSORFLOW + description: "We Recognize face using One Shot Learning and Face Verification is done using Triplet loss function on a pre-trained Inception Model " } - methods: { - name: "Non Maximum Suppression" - full_name: "Non Maximum Suppression" - description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + repositories: { + url: "https://github.com/sprenkle/VectorCards" + owner: "sprenkle" + framework: FRAMEWORK_OTHERS + number_of_stars: 10 } methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + name: "RoIPool" + full_name: "RoIPool" + description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" } methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + name: "Step Decay" + full_name: "Step Decay" + description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" } methods: { name: "1x1 Convolution" full_name: "1x1 Convolution" description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } methods: { name: "Dropout" full_name: "Dropout" description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "Step Decay" - full_name: "Step Decay" - description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + name: "YOLOv1" + full_name: "YOLOv1" + description: "**YOLOv1** is a single-stage object detection model. Object detection is framed as a regression problem to spatially separated bounding boxes and associated class probabilities. A single neural network predicts bounding boxes and class probabilities directly from full images in one evaluation. Since the whole detection pipeline is a single network, it can be optimized end-to-end directly on detection performance. \r\n\r\nThe network uses features from the entire image to predict each bounding box. It also predicts all bounding boxes across all classes for an image simultaneously. This means the network reasons globally about the full image and all the objects in the image." } methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." } methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" } } video: { video_id: "eTDcoeqj1_w" video_title: "PR-016: You only look once: Unified, real-time object detection" - number_of_likes: 100 - number_of_views: 16290 + number_of_likes: 102 + number_of_views: 16494 published_date: { seconds: 1497795435 } @@ -1647,21 +1662,6 @@ pr_id_to_video: { } authors: "Barret Zoph" authors: "Quoc V. Le" - repositories: { - url: "https://github.com/abcp4/DAPytorch" - owner: "abcp4" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/YaCpotato/deepaugmentFix" - owner: "YaCpotato" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/TreeLimes/QANAS" - owner: "TreeLimes" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/cshannonn/blackscholes_nas" owner: "cshannonn" @@ -1685,7 +1685,7 @@ pr_id_to_video: { url: "https://github.com/carpedm20/ENAS-pytorch" owner: "carpedm20" framework: FRAMEWORK_PYTORCH - number_of_stars: 2453 + number_of_stars: 2459 description: "PyTorch implementation of \"Efficient Neural Architecture Search via Parameters Sharing\"" } repositories: { @@ -1693,7 +1693,7 @@ pr_id_to_video: { url: "https://github.com/tensorflow/models" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70579 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { @@ -1707,15 +1707,30 @@ pr_id_to_video: { url: "https://github.com/DataCanvasIO/Hypernets" owner: "DataCanvasIO" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 757 + number_of_stars: 1018 description: "A General Automated Machine Learning framework to simplify the development of End-to-end AutoML toolkits in specific domains." } + repositories: { + url: "https://github.com/TreeLimes/QANAS" + owner: "TreeLimes" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/YaCpotato/deepaugmentFix" + owner: "YaCpotato" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/abcp4/DAPytorch" + owner: "abcp4" + framework: FRAMEWORK_PYTORCH + } } video: { video_id: "XP3vyVrrt3Q" video_title: "PR-017: Neural Architecture Search with Reinforcement Learning" number_of_likes: 31 - number_of_views: 3976 + number_of_views: 4008 published_date: { seconds: 1497796191 } @@ -1742,18 +1757,6 @@ pr_id_to_video: { authors: "Razvan Pascanu" authors: "Peter Battaglia" authors: "Timothy Lillicrap" - repositories: { - url: "https://github.com/jaehyunnn/RelationalNetwork_pytorch" - owner: "jaehyunnn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "An un-official implementation of Relational Network [A. Santoro et al., 2017] (PyTorch) " - } - repositories: { - url: "https://github.com/ttok0s7u2n5/ML2_proj" - owner: "ttok0s7u2n5" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/moduIo/Relation-Networks" owner: "moduIo" @@ -1807,12 +1810,26 @@ pr_id_to_video: { number_of_stars: 5 description: " Research presented at the NIPs 2017 ViGiL Workshop." } + repositories: { + url: "https://github.com/kimhc6028/relational-networks" + owner: "kimhc6028" + framework: FRAMEWORK_PYTORCH + number_of_stars: 781 + description: "Pytorch implementation of \"A simple neural network module for relational reasoning\" (Relational Networks)" + } + repositories: { + url: "https://github.com/cnichkawde/MatchingNetwork" + owner: "cnichkawde" + framework: FRAMEWORK_OTHERS + number_of_stars: 50 + description: "Implementation of \"Matching Networks for One Shot Learning\" in Keras https://arxiv.org/abs/1606.04080 " + } } video: { video_id: "Lb1PVpFp9F8" video_title: "PR-018: A Simple Neural Network Module for Relational Reasoning (DeepMind)" number_of_likes: 63 - number_of_views: 6786 + number_of_views: 6815 published_date: { seconds: 1498432650 } @@ -1841,72 +1858,72 @@ pr_id_to_video: { authors: "David Silver" authors: "Daan Wierstra" repositories: { - url: "https://github.com/Brook1711/RIS_components" - owner: "Brook1711" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "components of RIS simulations" - } - repositories: { - url: "https://github.com/rikluost/RL_DQN_Pong" - owner: "rikluost" + url: "https://github.com/xyshadow/baseline_ddpg" + owner: "xyshadow" framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 - description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" + description: "baseline DDPG implementation less than 400 lines" } repositories: { - url: "https://github.com/Medabid1/RL_Project" - owner: "Medabid1" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "My Deep learning project : Training a robot in MuJoCo with RL" + url: "https://github.com/parilo/rl-server" + owner: "parilo" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 13 + description: "Reinforcement Learning Server" } repositories: { - url: "https://github.com/flavioschneider/ml_papers_presentations" - owner: "flavioschneider" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 + url: "https://github.com/tegg89/magnn" + owner: "tegg89" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 95 + description: "MAGNet: Multi-agents control using Graph Neural Networks" } repositories: { - url: "https://github.com/KelvinYang0320/deepbots-panda" - owner: "KelvinYang0320" + url: "https://github.com/krasing/DRLearningContinuousControl" + owner: "krasing" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Panda with Deep Reinforcement Learning Simulation Environment Webots" + number_of_stars: 2 + description: "Project with Udacity" } repositories: { - url: "https://github.com/wpiszlogin/driver_critic" - owner: "wpiszlogin" + url: "https://github.com/Souphis/mobile_robot_rl" + owner: "Souphis" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Solution for CarRacing-v0 environment from OpenAI Gym. It uses the Deep Deterministic Policy Gradient algorithm." + number_of_stars: 11 + description: "Mobile robot control in V-REP using Deep Reinforcement Learning Algorithms." } repositories: { - url: "https://github.com/backgom2357/Recommender_system_via_deep_RL" - owner: "backgom2357" + url: "https://github.com/liampetti/DDPG" + owner: "liampetti" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "The implemetation of Deep Reinforcement Learning based Recommender System from the paper Deep Reinforcement Learning based Recommendation with Explicit User-Item Interactions Modeling by Liu et al." + number_of_stars: 59 + description: "Implementation of DDPG (Modified from the work of Patrick Emami) - Tensorflow (no TFLearn dependency), Ornstein Uhlenbeck noise function, reward discounting, works on discrete & continuous action spaces" } repositories: { - url: "https://github.com/SarodYatawatta/smart-calibration" - owner: "SarodYatawatta" + url: "https://github.com/dpoulopoulos/drl_collaborate_compete" + owner: "dpoulopoulos" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Deep reinforcement learning for smart calibration of radio telescopes. Automatic hyper-parameter tuning." + description: "Two Deep Reinforcement Learning agents that collaborate so as to learn to play a game of tennis." } repositories: { - url: "https://github.com/dchetelat/acer" - owner: "dchetelat" - framework: FRAMEWORK_PYTORCH - number_of_stars: 17 - description: "PyTorch implementation of both discrete and continuous ACER" + url: "https://github.com/WittmannF/quadcopter-best-practices" + owner: "WittmannF" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "[Unofficial] Udacity's How to Train a Quadcopter Best Practices" } repositories: { - url: "https://github.com/DanielLSM/safe-rl-tutorial" - owner: "DanielLSM" + url: "https://github.com/rikluost/RL_DQN_Pong" + owner: "rikluost" framework: FRAMEWORK_TENSORFLOW - description: "Just a mini tutorial on safe rl" + number_of_stars: 1 + description: "Tackling Atari 2600 game Pong with Reinforcement Learning by utilizing DQN and TF-Agents" + } + repositories: { + url: "https://github.com/Sheldonmao/Learning-To-walk" + owner: "Sheldonmao" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } methods: { name: "Dense Connections" @@ -1958,7 +1975,7 @@ pr_id_to_video: { video_id: "h2WSVBAC1t4" video_title: "PR-019: Continuous Control with Deep Reinforcement Learning" number_of_likes: 52 - number_of_views: 5369 + number_of_views: 5406 published_date: { seconds: 1498452479 } @@ -1982,41 +1999,6 @@ pr_id_to_video: { authors: "Xiangyu Zhang" authors: "Shaoqing Ren" authors: "Jian Sun" - repositories: { - url: "https://github.com/phogbinh/handwritten-digit-recognition" - owner: "phogbinh" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/ihsuy/Train-by-Reconnect" - owner: "ihsuy" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Official code for the NeurIPS 2020 paper Train by Reconnect: Decoupling Locations of Weights from Their Values by Yushi Qiu and Reiji Suda." - } - repositories: { - url: "https://github.com/AnzorGozalishvili/autoencoders_playground" - owner: "AnzorGozalishvili" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Playing with several types of autoencoders with supervised, unsupervised and semi-supervised learning objectives." - } - repositories: { - url: "https://github.com/hamiddimyati/dd2424-deep-learning" - owner: "hamiddimyati" - framework: FRAMEWORK_OTHERS - description: "All codes and reports for assignments of deep learning course" - } - repositories: { - url: "https://github.com/krish-pinninti/api-ann-python" - owner: "krish-pinninti" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/LiamLau1/MLDE" - owner: "LiamLau1" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/MrtnMndt/Rethinking_CNN_Layerwise_Feature_Amounts" owner: "MrtnMndt" @@ -2044,6 +2026,45 @@ pr_id_to_video: { number_of_stars: 3 description: "Recordings of my research navigation, including paper/book reading notes and related implementations" } + repositories: { + url: "https://github.com/sudhirk999/ResearchPapersLinks" + owner: "sudhirk999" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/hwalsuklee/tensorflow-mnist-MLP-batch_normalization-weight_initializers" + owner: "hwalsuklee" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 49 + description: "MNIST classification using Multi-Layer Perceptron (MLP) with 2 hidden layers. Some weight-initializers and batch-normalization are implemented." + } + repositories: { + url: "https://github.com/eatamath/metallic" + owner: "eatamath" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + url: "https://github.com/AnzorGozalishvili/autoencoders_playground" + owner: "AnzorGozalishvili" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Playing with several types of autoencoders with supervised, unsupervised and semi-supervised learning objectives." + } + repositories: { + url: "https://github.com/zonetrooper32/VDCNN" + owner: "zonetrooper32" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 162 + description: "Implementation of Very Deep Convolutional Neural Network for Text Classification" + } + repositories: { + url: "https://github.com/ihsuy/Train-by-Reconnect" + owner: "ihsuy" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Official code for the NeurIPS 2020 paper Train by Reconnect: Decoupling Locations of Weights from Their Values by Yushi Qiu and Reiji Suda." + } methods: { name: "PReLU" full_name: "Parameterized ReLU" @@ -2098,8 +2119,8 @@ pr_id_to_video: { video: { video_id: "absOinFeGv0" video_title: "PR-020: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification" - number_of_likes: 14 - number_of_views: 1823 + number_of_likes: 15 + number_of_views: 1842 published_date: { seconds: 1499002058 } @@ -2122,72 +2143,73 @@ pr_id_to_video: { authors: "Sergey Ioffe" authors: "Christian Szegedy" repositories: { - url: "https://github.com/ThanasisMattas/smartflow" - owner: "ThanasisMattas" - framework: FRAMEWORK_TENSORFLOW - description: "A Deep Learning solver for the Shallow Water Equations" + url: "https://github.com/Sakib1263/1DResNet-Builder-KERAS" + owner: "Sakib1263" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Models supported: ResNet18, ResNet34, ResNet50, ResNet101, ResNet 152 (1D and 2D versions with DEMO for Classification and Regression)." } repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/deeplab" - owner: "research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 - description: "Models and examples built with TensorFlow" + url: "https://github.com/Sakib1263/1DResNet-KERAS" + owner: "Sakib1263" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Models supported: ResNet18, ResNet34, ResNet50, ResNet101, ResNet 152 (1D and 2D versions with DEMO for Classification and Regression)." } repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/slim" - owner: "research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 - description: "Models and examples built with TensorFlow" + url: "https://github.com/Sakib1263/ResNet1D-Model-Builder-KERAS" + owner: "Sakib1263" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Models supported: ResNet18, ResNet34, ResNet50, ResNet101, ResNet 152 (1D and 2D versions with DEMO for Classification and Regression)." } repositories: { url: "https://github.com/tensorflow/models/tree/master/research/seq_flow_lite" owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70576 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/simo-bat/Crack_detection" - owner: "simo-bat" + url: "https://github.com/tensorflow/models/tree/master/research/deeplab" + owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + number_of_stars: 70934 + description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/dodoproptit99/deep-speaker" - owner: "dodoproptit99" + url: "https://github.com/LouisFoucard/StereoConvNet" + owner: "LouisFoucard" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Speaker identification with Deep Speaker" - } - repositories: { - url: "https://github.com/KushajveerSingh/SPADE-PyTorch" - owner: "KushajveerSingh" - framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "PyTorch unofficial implementation of Semantic Image Synthesis with Spatially-Adaptive Normalization paper by Nvidia Research" + number_of_stars: 149 + description: "Stereo convolutional neural network for depth map prediction from stereo images" } repositories: { - url: "https://github.com/sayakpaul/Adaptive-Gradient-Clipping" - owner: "sayakpaul" + url: "https://github.com/Liuyubao/transfer-learning" + owner: "Liuyubao" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 58 - description: "Minimal implementation of adaptive gradient clipping (https://arxiv.org/abs/2102.06171) in TensorFlow 2. " + number_of_stars: 3 + description: "Transfer learning in Practice" } repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" + url: "https://github.com/lim0606/caffe-googlenet-bn" + owner: "lim0606" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" + number_of_stars: 130 + description: "re-implementation of googlenet batch normalization" } repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/normalization/batch_norm" - owner: "normalization" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + url: "https://github.com/Stick-To/Inception" + owner: "Stick-To" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "inception in tensorflow" + } + repositories: { + url: "https://github.com/gmouzella/Efective_TensorFlow" + owner: "gmouzella" + framework: FRAMEWORK_TENSORFLOW + description: "Aqui se encontra a tradução para o português do artigo originalmente escrito por Vahid Kazemi (vahidk) \"Tutorial Efective TensorFlow\"" } methods: { name: "Softmax" @@ -2243,8 +2265,8 @@ pr_id_to_video: { video: { video_id: "TDx8iZHwFtM" video_title: "PR-021: Batch Normalization (language: korean)" - number_of_likes: 103 - number_of_views: 8073 + number_of_likes: 104 + number_of_views: 8118 published_date: { seconds: 1499004604 } @@ -2271,65 +2293,71 @@ pr_id_to_video: { authors: "Ilya Sutskever" authors: "Pieter Abbeel" repositories: { - url: "https://github.com/yashgarg98/GAN" - owner: "yashgarg98" - framework: FRAMEWORK_OTHERS - description: "Some implementations of Generative Adversarial Networks.(DCGAN, InfoGAN)" + url: "https://github.com/SeonbeomKim/TensorFlow-InfoGAN" + owner: "SeonbeomKim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets" } repositories: { - url: "https://github.com/chandragupta0001/GAN/tree/master/info_gan" - owner: "master" - framework: FRAMEWORK_OTHERS + url: "https://github.com/sidneyp/bidirectional" + owner: "sidneyp" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Complete project for paper \"Bidirectional Learning for Robust Neural Networks\"" } repositories: { - url: "https://github.com/elingaard/infogan-mnist" - owner: "elingaard" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "PyTorch implementation of InfoGAN" + url: "https://github.com/Murali81/InfoGAN" + owner: "Murali81" + framework: FRAMEWORK_OTHERS + description: "A demo script explaining InfoGAN on MNIST Dataset" } repositories: { - url: "https://github.com/amiryanj/socialways" - owner: "amiryanj" - framework: FRAMEWORK_PYTORCH - number_of_stars: 87 - description: "Social Ways: Learning Multi-Modal Distributions of Pedestrian Trajectories with GANs (CVPR 2019)" + url: "https://github.com/jonasz/progressive_infogan" + owner: "jonasz" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 183 + description: "Progressive training of GANs with Mutual Information Penalty" } repositories: { - url: "https://github.com/Neptune-Trojans/GANs" - owner: "Neptune-Trojans" + url: "https://github.com/VitoRazor/Gan_Architecture" + owner: "VitoRazor" framework: FRAMEWORK_TENSORFLOW - description: "Implementation of different GANs architectures" + number_of_stars: 1 } repositories: { - url: "https://github.com/zcemycl/Matlab-GAN" - owner: "zcemycl" - framework: FRAMEWORK_PYTORCH - number_of_stars: 78 - description: "MATLAB implementations of Generative Adversarial Networks -- from GAN to Pixel2Pixel, CycleGAN" + url: "https://github.com/jeanjerome/semisupervised_timeseries_infogan" + owner: "jeanjerome" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "A tensorflow implementation of informative generative adversarial network (InfoGAN ) to one dimensional ( 1D ) time series data with a supervised loss function. So it's called semisupervised Info GAN." } repositories: { - url: "https://github.com/Evavanrooijen/InfoGAN-PyTorch" - owner: "Evavanrooijen" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/openai/InfoGAN" + owner: "openai" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 965 + description: "Code for reproducing key results in the paper \"InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets\"" } repositories: { - url: "https://github.com/inkplatform/InfoGAN-PyTorch" - owner: "inkplatform" - framework: FRAMEWORK_PYTORCH - description: "code for InfoGAN" + url: "https://github.com/landeros10/infoganJL" + owner: "landeros10" + framework: FRAMEWORK_OTHERS + description: "InfoGAN implementation in Julia with flexible nn architectures for generator and descriminator" } repositories: { - url: "https://github.com/vinoth654321/Casia-Webface" - owner: "vinoth654321" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/buriburisuri/timeseries_gan" + owner: "buriburisuri" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 267 + description: "A tensorflow implementation of GAN ( exactly InfoGAN or Info GAN ) to one dimensional ( 1D ) time series data." } repositories: { - url: "https://github.com/bacdavid/InfomaxVAE" - owner: "bacdavid" - framework: FRAMEWORK_OTHERS - description: "Obtain the latent variables that contain the maximal mutual information." + url: "https://github.com/Natsu6767/InfoGAN-PyTorch" + owner: "Natsu6767" + framework: FRAMEWORK_PYTORCH + number_of_stars: 221 + description: "PyTorch Implementation of InfoGAN" } methods: { name: "Batch Normalization" @@ -2385,8 +2413,8 @@ pr_id_to_video: { video: { video_id: "_4jbgniqt_Q" video_title: "PR-022: InfoGAN (OpenAI)" - number_of_likes: 42 - number_of_views: 5941 + number_of_likes: 43 + number_of_views: 5988 published_date: { seconds: 1499608297 } @@ -2409,92 +2437,69 @@ pr_id_to_video: { authors: "Joseph Redmon" authors: "Ali Farhadi" repositories: { - url: "https://github.com/westerndigitalcorporation/YOLOv3-in-PyTorch" - owner: "westerndigitalcorporation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 84 - description: "YOLOv3 in PyTorch with training and inference module implemented." - } - repositories: { - url: "https://github.com/drscotthawley/SPNet" - owner: "drscotthawley" + url: "https://github.com/Sushma07/dancedarknet" + owner: "Sushma07" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Object detection for ESPI images of oscillating steelpan drums" + description: "Retraining " } repositories: { - url: "https://github.com/Qengineering/YoloV2-ncnn-Jetson-Nano" - owner: "Qengineering" + url: "https://github.com/yuliani29/yolotraining" + owner: "yuliani29" framework: FRAMEWORK_OTHERS - number_of_stars: 2 } repositories: { - url: "https://github.com/Qengineering/YoloV2-ncnn-Raspberry-Pi-4" - owner: "Qengineering" + url: "https://github.com/rnirdhar/yoloTestOneClass" + owner: "rnirdhar" framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "YoloV2 for bare Raspberry Pi 4" } repositories: { - url: "https://github.com/benjamintli/darknet-gun-detector" - owner: "benjamintli" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/bobby20180331/darknet_pycharm" + owner: "bobby20180331" + framework: FRAMEWORK_OTHERS number_of_stars: 1 + description: "pycharm" } repositories: { - url: "https://github.com/DavianYang/yolo.ai" - owner: "DavianYang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Yolo Implementation (v1, v2, v3)" + url: "https://github.com/KingBoyBIT/yolov3test" + owner: "KingBoyBIT" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/preste-nakam/AI_whiteboard" - owner: "preste-nakam" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "The system which helps to transform any wall or flat surface into an interactive whiteboard just with an ordinary RGB camera and a hand! " + url: "https://github.com/darshans0200/YOLOTest" + owner: "darshans0200" + framework: FRAMEWORK_OTHERS + description: "test" } repositories: { - url: "https://github.com/preste-ai/camera_ai_whiteboard" - owner: "preste-ai" + url: "https://github.com/jianing-sun/Mask-YOLO" + owner: "jianing-sun" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Transform any wall to an intelligent whiteboard" + number_of_stars: 92 + description: "Inspired from Mask R-CNN to build a multi-task learning, two-branch architecture: one branch based on YOLOv2 for object detection, the other branch for instance segmentation. Simply tested on Rice and Shapes. MobileNet supported." } repositories: { - url: "https://gitlab.com/eavise/lightnet" - owner: "eavise" - framework: FRAMEWORK_PYTORCH - number_of_stars: 56 - description: "Darknet got illuminated by PyTorch ~ Meet Lightnet" + url: "https://github.com/williamccondori/YOLO-NFPA" + owner: "williamccondori" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Proyecto para detectar símbolos NFPA" } repositories: { - url: "https://github.com/Vijayabhaskar96/Object-Detection-Algorithms" - owner: "Vijayabhaskar96" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "My Implementations of Popular Object detection algorithms in Pytorch." - } - methods: { - name: "Step Decay" - full_name: "Step Decay" - description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" - } - methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + url: "https://github.com/toufiksk/darknet" + owner: "toufiksk" + framework: FRAMEWORK_OTHERS } - methods: { - name: "Darknet-19" - full_name: "Darknet-19" - description: "**Darknet-19** is a convolutional neural network that is used as the backbone of [YOLOv2](https://paperswithcode.com/method/yolov2). Similar to the [VGG](https://paperswithcode.com/method/vgg) models it mostly uses $3 \\times 3$ filters and doubles the number of channels after every pooling step. Following the work on Network in Network (NIN) it uses global average pooling to make predictions as well as $1 \\times 1$ filters to compress the feature representation between $3 \\times 3$ convolutions. Batch Normalization is used to stabilize training, speed up convergence, and regularize the model batch." + repositories: { + url: "https://github.com/kirilcvetkov92/Vehicle-Detection" + owner: "kirilcvetkov92" + framework: FRAMEWORK_OTHERS + number_of_stars: 24 + description: "Vehicle detection implemented with you Only Look Once. It's an object detector that uses features learned by a deep convolutional neural network to detect an object." } methods: { - name: "ColorJitter" - full_name: "Color Jitter" - description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" } methods: { name: "Softmax" @@ -2506,31 +2511,47 @@ pr_id_to_video: { full_name: "Weight Decay" description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" } + methods: { + name: "Darknet-19" + full_name: "Darknet-19" + description: "**Darknet-19** is a convolutional neural network that is used as the backbone of [YOLOv2](https://paperswithcode.com/method/yolov2). Similar to the [VGG](https://paperswithcode.com/method/vgg) models it mostly uses $3 \\times 3$ filters and doubles the number of channels after every pooling step. Following the work on Network in Network (NIN) it uses global average pooling to make predictions as well as $1 \\times 1$ filters to compress the feature representation between $3 \\times 3$ convolutions. Batch Normalization is used to stabilize training, speed up convergence, and regularize the model batch." + } methods: { name: "Random Resized Crop" full_name: "Random Resized Crop" description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" } methods: { - name: "YOLOv2" - full_name: "YOLOv2" - description: "**YOLOv2**, or [**YOLO9000**](https://www.youtube.com/watch?v=QsDDXSmGJZA), is a single-stage real-time object detection model. It improves upon [YOLOv1](https://paperswithcode.com/method/yolov1) in several ways, including the use of Darknet-19 as a backbone, batch normalization, use of a high-resolution classifier, and the use of anchor boxes to predict bounding boxes, and more." + name: "ColorJitter" + full_name: "Color Jitter" + description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" } methods: { - name: "Fast-YOLOv2" - full_name: "Fast-YOLOv2" + name: "Step Decay" + full_name: "Step Decay" + description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + } + methods: { + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" } methods: { name: "Polynomial Rate Decay" full_name: "Polynomial Rate Decay" description: "**Polynomial Rate Decay** is a learning rate schedule where we polynomially decay the learning rate." } + methods: { + name: "YOLOv2" + full_name: "YOLOv2" + description: "**YOLOv2**, or [**YOLO9000**](https://www.youtube.com/watch?v=QsDDXSmGJZA), is a single-stage real-time object detection model. It improves upon [YOLOv1](https://paperswithcode.com/method/yolov1) in several ways, including the use of Darknet-19 as a backbone, batch normalization, use of a high-resolution classifier, and the use of anchor boxes to predict bounding boxes, and more." + } } video: { video_id: "6fdclSGgeio" video_title: "PR-023: YOLO9000: Better, Faster, Stronger" number_of_likes: 96 - number_of_views: 12624 + number_of_views: 12799 published_date: { seconds: 1500299473 } @@ -2554,61 +2575,68 @@ pr_id_to_video: { authors: "Nal Kalchbrenner" authors: "Koray Kavukcuoglu" repositories: { - url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/autoregressive/pixel_cnn.py" - owner: "autoregressive" - framework: FRAMEWORK_PYTORCH - number_of_stars: 156 - description: "Easy generative modeling in PyTorch." + url: "https://github.com/tccnchsu/Artifical_Intelegent" + owner: "tccnchsu" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/kamenbliznashki/pixel_models" - owner: "kamenbliznashki" + url: "https://github.com/eyalbetzalel/pytorch-generative-v2" + owner: "eyalbetzalel" framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "Pytorch implementations of autoregressive pixel models - PixelCNN, PixelCNN++, PixelSNAIL" } repositories: { - url: "https://github.com/eyalbetzalel/pytorch-generative-v6" - owner: "eyalbetzalel" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/vocong25/gated_pixelcnn" + owner: "vocong25" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/eyalbetzalel/pytorch-generative-v2" - owner: "eyalbetzalel" + url: "https://github.com/arcelien/hawc-deep-learning" + owner: "arcelien" framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Reproducing physics simulations on HAWC data with deep learning" } repositories: { - url: "https://github.com/davidemartinelli/PixelCNN" - owner: "davidemartinelli" + url: "https://github.com/singh-hrituraj/PixelCNN-Pytorch" + owner: "singh-hrituraj" framework: FRAMEWORK_PYTORCH + number_of_stars: 28 + description: "A naive implementation of PixelCNN in Pytorch as described in A Oord et. al. " } repositories: { - url: "https://github.com/doiodl/pixelcnn-rnn" - owner: "doiodl" - framework: FRAMEWORK_TENSORFLOW - description: "Реализация генеративных сетей PixelCNN и PixelRNN по оф. статье:https://arxiv.org/pdf/1601.06759.pdf . Стэк технологий: python, tensorflow и keras. Весь код был написан на google colab с tf 2.0" + url: "https://github.com/ardapekis/pixel-rnn" + owner: "ardapekis" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Implementation based on the 2016 Pixel-RNN Paper" } repositories: { - url: "https://github.com/eyalbetzalel/pytorch-generative" - owner: "eyalbetzalel" + url: "https://github.com/anordertoreclaim/PixelCNN" + owner: "anordertoreclaim" framework: FRAMEWORK_PYTORCH + number_of_stars: 23 + description: "PyTorch implementation of gated PixelCNN" } repositories: { - url: "https://github.com/tccnchsu/Artifical_Intelegent" - owner: "tccnchsu" + url: "https://github.com/rampage644/wavenet" + owner: "rampage644" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 54 + description: "WaveNet implementation with chainer" } repositories: { - url: "https://github.com/vocong25/gated_pixelcnn" - owner: "vocong25" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/autoregressive/pixel_cnn.py" + owner: "autoregressive" + framework: FRAMEWORK_PYTORCH + number_of_stars: 167 + description: "Easy generative modeling in PyTorch." } repositories: { - url: "https://github.com/arcelien/hawc-deep-learning" - owner: "arcelien" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Reproducing physics simulations on HAWC data with deep learning" + url: "https://github.com/sarus-tech/tf2-published-models/tree/master/pixelcnn" + owner: "master" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 33 + description: "Sarus implementation of classical ML models. The models are implemented using the Keras API of tensorflow 2. Vizualization are implemented and can be seen in tensorboard." } methods: { name: "Masked Convolution" @@ -2640,7 +2668,7 @@ pr_id_to_video: { video_id: "BvcwEz4VPIQ" video_title: "PR-024: Pixel Recurrent Neural Network" number_of_likes: 49 - number_of_views: 5563 + number_of_views: 5622 published_date: { seconds: 1502156580 } @@ -2668,7 +2696,7 @@ pr_id_to_video: { video_id: "KdRo7ATNs9g" video_title: "PR-025: Learning with side information through modality hallucination (2016)" number_of_likes: 18 - number_of_views: 1831 + number_of_views: 1841 published_date: { seconds: 1500818803 } @@ -2692,65 +2720,67 @@ pr_id_to_video: { authors: "Philipp Fischer" authors: "Thomas Brox" repositories: { - url: "https://github.com/sowit-labs/grassUNet" - owner: "sowit-labs" + url: "https://github.com/Sakib1263/UNet-UNetEnsembled-UNetPlus-UNetPlusPlus-MultiResUNet-2D-Segmentation-Model-Builder-KERAS" + owner: "Sakib1263" framework: FRAMEWORK_OTHERS - number_of_stars: 4 + number_of_stars: 1 + description: "2D Segmentation Models Supported: UNet, UNet-Ensembled, UNet+, UNet++, MultiResUNet with optional Deep Supervision and Autoencoder modes." } repositories: { - url: "https://github.com/samson6460/tf2_Segmentation" - owner: "samson6460" + url: "https://github.com/Sakib1263/UNet2D-Segmentation-Model-Builder-KERAS" + owner: "Sakib1263" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Segmentation framework implemented by tensorflow 2." + number_of_stars: 1 + description: "2D Segmentation Models Supported: UNet, UNet-Ensembled, UNet+, UNet++, MultiResUNet with optional Deep Supervision and Autoencoder modes." } repositories: { - url: "https://github.com/mateuszbuda/brain-segmentation-pytorch" - owner: "mateuszbuda" + url: "https://github.com/park-cheol/Speech_Enhancement-DCUnet" + owner: "park-cheol" framework: FRAMEWORK_PYTORCH - number_of_stars: 415 - description: "U-Net implementation in PyTorch for FLAIR abnormality segmentation in brain MRI" + number_of_stars: 1 } repositories: { - url: "https://github.com/Agasanli12/Road-Extraction-with-ResUnet" - owner: "Agasanli12" + url: "https://github.com/GeoDQ/Seismic-image-segmentation" + owner: "GeoDQ" framework: FRAMEWORK_OTHERS - number_of_stars: 2 } repositories: { - url: "https://github.com/salem-devloper/final" - owner: "salem-devloper" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/GeoDQ/ImageSegmentation_Unet" + owner: "GeoDQ" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/GewelsJI/PNS-Net" - owner: "GewelsJI" + url: "https://github.com/mateuszbuda/brain-segmentation-pytorch" + owner: "mateuszbuda" framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "2021-MICCAI-Progressively Normalized Self-Attention Network for Video Polyp Segmentation" + number_of_stars: 428 + description: "U-Net implementation in PyTorch for FLAIR abnormality segmentation in brain MRI" } repositories: { - url: "https://github.com/BioWar/Satellite-Image-Segmentation-using-Deep-Learning-for-Deforestation-Detection" - owner: "BioWar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + url: "https://github.com/jvanvugt/pytorch-unet" + owner: "jvanvugt" + framework: FRAMEWORK_PYTORCH + number_of_stars: 241 + description: "Tunable U-Net implementation in PyTorch" } repositories: { - url: "https://github.com/MargeryLab/Pytorch-UNet-copy" - owner: "MargeryLab" + url: "https://github.com/trichtu/Recurrent_Attention_U_net" + owner: "trichtu" framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "spatial-temperal rain prediction/segmentation" } repositories: { - url: "https://github.com/salem-devloper/COVID-Lung-Segment" - owner: "salem-devloper" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/TobiasGruening/ARU-Net" + owner: "TobiasGruening" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 119 } repositories: { - url: "https://github.com/taha7ussein007/Papers_Implementation/tree/main/Paper_Implementation_From_Scratch/UNet_FromScratch_Pytorch" - owner: "Paper_Implementation_From_Scratch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "In this repo i'm going to practice implementing research, academic and business papers." + url: "https://github.com/mariosfourn/ScienceBowl" + owner: "mariosfourn" + framework: FRAMEWORK_TENSORFLOW + description: "2018 Data Science Bowl" } methods: { name: "Convolution" @@ -2782,7 +2812,7 @@ pr_id_to_video: { video_id: "ZdPBkPGfRSk" video_title: "PR-026: Notes for CVPR Machine Learning Session" number_of_likes: 9 - number_of_views: 1505 + number_of_views: 1514 published_date: { seconds: 1501469470 } @@ -2813,8 +2843,8 @@ pr_id_to_video: { video: { video_id: "uZ2GtEe-50E" video_title: "PR-027:GloVe - Global vectors for word representation" - number_of_likes: 65 - number_of_views: 4349 + number_of_likes: 67 + number_of_views: 4385 published_date: { seconds: 1502026123 } @@ -2842,67 +2872,67 @@ pr_id_to_video: { url: "https://github.com/pytorch/vision" owner: "pytorch" framework: FRAMEWORK_PYTORCH - number_of_stars: 9433 + number_of_stars: 9607 description: "Datasets, Transforms and Models specific to Computer Vision" } repositories: { - url: "https://github.com/Duplums/bhb10k-dl-benchmark" - owner: "Duplums" + url: "https://github.com/andreasveit/densenet-pytorch" + owner: "andreasveit" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A Reproducible Benchmark for CNN Models on the BHB-10K Dataset" + number_of_stars: 365 + description: "A PyTorch Implementation for Densely Connected Convolutional Networks (DenseNets)" } repositories: { - url: "https://github.com/priyavrat-misra/xrays-and-gradcam" - owner: "priyavrat-misra" + url: "https://github.com/muditrastogi/chestai" + owner: "muditrastogi" framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "Classification and Gradient-based Localization of Chest Radiographs using PyTorch." + number_of_stars: 1 + description: "This repo is dedicated to prepare an automatic chest disease classification using deep learning." } repositories: { - url: "https://github.com/cmasch/densenet" - owner: "cmasch" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 38 - description: "Implementation of Densely Connected Convolutional Network with Keras and TensorFlow." + url: "https://github.com/fengjiqiang/pretrainedmodel_pytorch" + owner: "fengjiqiang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "pretrainedmodel_pytorch senet, densenet, dpn" } repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - owner: "lpirola13" + url: "https://github.com/ApexPredator1/DenseNet_tensorflow" + owner: "ApexPredator1" framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." } repositories: { - url: "https://github.com/bozliu/E2E-Keyword-Spotting" - owner: "bozliu" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RRoundTable/OceanLitter_dataset_generator" + owner: "RRoundTable" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 - description: "Wake-Up Keyword Detection With End To End Deep Neural Networks" + description: "해양쓰레기 데이터를 생성해주는 Cycle_GAN입니다." } repositories: { - url: "https://github.com/lpirola13/flower_recognizer" - owner: "lpirola13" - framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + url: "https://github.com/xypan1232/iDeepE" + owner: "xypan1232" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "inferring RBP binding sites and motifs using local and global CNNs" } repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" + url: "https://github.com/jonnor/datascience-master" + owner: "jonnor" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" + number_of_stars: 12 + description: "Journal/notes/log of my Masters in Data Science degree" } repositories: { - url: "https://github.com/niranjana98/Image-Classification" - owner: "niranjana98" - framework: FRAMEWORK_OTHERS + url: "https://github.com/idobronstein/vision_networks" + owner: "idobronstein" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + url: "https://github.com/seasonyc/densenet" + owner: "seasonyc" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 22 + description: "A keras implementation of DenseNet" } methods: { name: "Nesterov Accelerated Gradient" @@ -2958,8 +2988,8 @@ pr_id_to_video: { video: { video_id: "fe2Vn0mwALI" video_title: "PR-028: Densely Connected Convolutional Networks (CVPR 2017, Best Paper Award) by Gao Huang et al." - number_of_likes: 125 - number_of_views: 12538 + number_of_likes: 126 + number_of_views: 12679 published_date: { seconds: 1502159004 } @@ -2986,7 +3016,7 @@ pr_id_to_video: { video_id: "AXi4s3aFN6M" video_title: "PR-029: Apprenticeship Learning via Inverse Reinforcement Learning" number_of_likes: 16 - number_of_views: 2096 + number_of_views: 2129 published_date: { seconds: 1505165154 } @@ -3017,94 +3047,94 @@ pr_id_to_video: { authors: "Johannes Totz" authors: "Zehan Wang" authors: "Wenzhe Shi" - repositories: { - url: "https://github.com/epochlab/xres" - owner: "epochlab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Super-scale and enhance the quality of low resolution images." - } repositories: { url: "https://github.com/Lornatang/SRGAN_PyTorch" owner: "Lornatang" framework: FRAMEWORK_PYTORCH - number_of_stars: 61 + number_of_stars: 71 description: "A simple and complete implementation of super-resolution paper." } repositories: { url: "https://github.com/Lornatang/SRGAN-PyTorch" owner: "Lornatang" framework: FRAMEWORK_PYTORCH - number_of_stars: 61 + number_of_stars: 71 description: "A simple and complete implementation of super-resolution paper." } repositories: { - url: "https://github.com/chaoxu0512/Pushbroom-satellite-image-SRGAN" - owner: "chaoxu0512" + url: "https://github.com/CreativeCodingLab/DeepIllumination" + owner: "CreativeCodingLab" framework: FRAMEWORK_PYTORCH + number_of_stars: 45 + description: "Code and examples from our paper \"Deep Illumination: Approximating Dynamic Global Illumination with Generative Adversarial Networks,\" by Manu Mathew Thomas and Angus Forbes" } repositories: { - url: "https://github.com/omkarghugarkar007/Neural_Super_Sampling" - owner: "omkarghugarkar007" + url: "https://github.com/leftthomas/SRGAN" + owner: "leftthomas" + framework: FRAMEWORK_PYTORCH + number_of_stars: 718 + description: "A PyTorch implementation of SRGAN based on CVPR 2017 paper \"Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network\"" + } + repositories: { + url: "https://github.com/tensorlayer/SRGAN" + owner: "tensorlayer" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "An attempt to upsample images by a factor of 4 using GAN" + number_of_stars: 2568 + description: "Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network" } repositories: { - url: "https://github.com/AntonioAlgaida/Edge.SRGAN" - owner: "AntonioAlgaida" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A solution of SISR that merges the ideas of SRGAN and Edge Informed SISR. This solution was presented on 1st SpainAI hackathon obtain 4th position." + url: "https://github.com/titu1994/Image-Super-Resolution" + owner: "titu1994" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 742 + description: "Implementation of Super Resolution CNN in Keras." } repositories: { - url: "https://github.com/Idelcads/Super_Resolution_overview" - owner: "Idelcads" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/idearibosome/tf-perceptual-eusr" + owner: "idearibosome" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 43 + description: "A TensorFlow-based image super-resolution model considering both quantitative and perceptual quality" } repositories: { - url: "https://github.com/Idelcads/IMKI_Technical_test" - owner: "Idelcads" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/jupiterman/Super-Resolution-Images" + owner: "jupiterman" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 } repositories: { - url: "https://github.com/akanametov/SuperResolution" - owner: "akanametov" + url: "https://github.com/xinntao/BasicSR" + owner: "xinntao" framework: FRAMEWORK_PYTORCH - description: "A SuperResolution GAN trained on STL10 dataset" + number_of_stars: 2191 + description: "Open Source Image and Video Restoration Toolbox for Super-resolution, Denoise, Deblurring, etc. Currently, it includes EDSR, RCAN, SRResNet, SRGAN, ESRGAN, EDVR, etc. Also support StyleGAN2, DFDNet." } repositories: { - url: "https://github.com/akanametov/Pix2Pix" - owner: "akanametov" + url: "https://github.com/Lornatang/ESRGAN-PyTorch" + owner: "Lornatang" framework: FRAMEWORK_PYTORCH - description: "A Pytorch implementation of Pix2Pix GAN" - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + number_of_stars: 34 + description: "A simple implementation of esrgan, which uses the pytorch framework." } methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" } methods: { - name: "VGG" - full_name: "VGG" - description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "SRGAN Residual Block" - full_name: "SRGAN Residual Block" - description: "**SRGAN Residual Block** is a residual block used in the [SRGAN](https://paperswithcode.com/method/srgan#) generator for image super-resolution. It is similar to standard [residual blocks](https://paperswithcode.com/method/residual-block), although it uses a [PReLU](https://paperswithcode.com/method/prelu) activation function to help training (preventing sparse gradients during GAN training)." + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { name: "VGG Loss" @@ -3117,26 +3147,31 @@ pr_id_to_video: { description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" } methods: { name: "SRGAN" full_name: "SRGAN" description: "**SRGAN** is a generative adversarial network for single image super-resolution. It uses a perceptual loss function which consists of an adversarial loss and a content loss. The adversarial loss pushes the solution to the natural image manifold using a discriminator network that is trained to differentiate between the super-resolved images and original photo-realistic images. In addition, the authors use a content loss motivated by perceptual similarity instead of similarity in pixel space. The actual networks - depicted in the Figure to the right - consist mainly of residual blocks for feature extraction.\r\n\r\nFormally we write the perceptual loss function as a weighted sum of a (VGG) content loss $l^{SR}\\_{X}$ and an adversarial loss component $l^{SR}\\_{Gen}$:\r\n\r\n$$ l^{SR} = l^{SR}\\_{X} + 10^{-3}l^{SR}\\_{Gen} $$" } + methods: { + name: "PReLU" + full_name: "Parameterized ReLU" + description: "A **Parametric Rectified Linear Unit**, or **PReLU**, is an activation function that generalizes the traditional rectified unit with a slope for negative values. Formally:\r\n\r\n$$f\\left(y\\_{i}\\right) = y\\_{i} \\text{ if } y\\_{i} \\ge 0$$\r\n$$f\\left(y\\_{i}\\right) = a\\_{i}y\\_{i} \\text{ if } y\\_{i} \\leq 0$$\r\n\r\nThe intuition is that different layers may require different types of nonlinearity. Indeed the authors find in experiments with convolutional neural networks that PReLus for the initial layer have more positive slopes, i.e. closer to linear. Since the filters of the first layers are Gabor-like filters such as edge or texture detectors, this shows a circumstance where positive and negative responses of filters are respected. In contrast the authors find deeper layers have smaller coefficients, suggesting the model becomes more discriminative at later layers (while it wants to retain more information at earlier layers)." + } } video: { video_id: "nGPMKnoJTcI" video_title: "PR-030: Photo-Realistic Single Image Super Resolution Using a Generative Adversarial Network" - number_of_likes: 24 - number_of_views: 2827 + number_of_likes: 27 + number_of_views: 2908 published_date: { seconds: 1502636018 } @@ -3176,14 +3211,14 @@ pr_id_to_video: { url: "https://github.com/chenwydj/learning-to-learn-by-gradient-descent-by-gradient-descent" owner: "chenwydj" framework: FRAMEWORK_PYTORCH - number_of_stars: 32 + number_of_stars: 34 description: "Pytorch version of NIPS'16 \"Learning to learn by gradient descent by gradient descent\"" } repositories: { url: "https://github.com/yangsenius/learning-to-learn-by-pytorch" owner: "yangsenius" framework: FRAMEWORK_PYTORCH - number_of_stars: 35 + number_of_stars: 36 description: "\"Learning to learn by gradient descent by gradient descent \"by PyTorch -- a simple re-implementation." } } @@ -3191,7 +3226,7 @@ pr_id_to_video: { video_id: "p55H46RiZ6k" video_title: "PR-031: Learning to learn by gradient descent by gradient descent" number_of_likes: 16 - number_of_views: 2401 + number_of_views: 2417 published_date: { seconds: 1504453983 } @@ -3213,12 +3248,6 @@ pr_id_to_video: { } authors: "Andrej Karpathy" authors: "Li Fei-Fei" - repositories: { - url: "https://github.com/IzabelaKrupinska/PROJBAD" - owner: "IzabelaKrupinska" - framework: FRAMEWORK_OTHERS - description: "Pliki do projektu badawczego." - } repositories: { url: "https://github.com/VinitSR7/Image-Caption-Generation" owner: "VinitSR7" @@ -3226,12 +3255,18 @@ pr_id_to_video: { number_of_stars: 12 description: "Image Captioning: Implementing the Neural Image Caption Generator" } + repositories: { + url: "https://github.com/IzabelaKrupinska/PROJBAD" + owner: "IzabelaKrupinska" + framework: FRAMEWORK_OTHERS + description: "Pliki do projektu badawczego." + } } video: { video_id: "Q-Cm7nw85iE" video_title: "PR-032: Deep Visual-Semantic Alignments for Generating Image Descriptions" number_of_likes: 13 - number_of_views: 2041 + number_of_views: 2071 published_date: { seconds: 1504445734 } @@ -3256,14 +3291,6 @@ pr_id_to_video: { authors: "Kye-Hyeon Kim" authors: "Yeongjae Cheon" authors: "Minje Park" - repositories: { - is_official: true - url: "https://github.com/sanghoon/pva-faster-rcnn" - owner: "sanghoon" - framework: FRAMEWORK_OTHERS - number_of_stars: 655 - description: "Demo code for PVANet" - } repositories: { url: "https://github.com/busyboxs/Some-resources-useful-for-me" owner: "busyboxs" @@ -3281,12 +3308,20 @@ pr_id_to_video: { owner: "wuyx" framework: FRAMEWORK_OTHERS } + repositories: { + is_official: true + url: "https://github.com/sanghoon/pva-faster-rcnn" + owner: "sanghoon" + framework: FRAMEWORK_OTHERS + number_of_stars: 657 + description: "Demo code for PVANet" + } } video: { video_id: "TYDGTnxUGHQ" video_title: "PR-033: PVANet: Lightweight Deep Neural Networks for Real-time Object Detection" number_of_likes: 25 - number_of_views: 3390 + number_of_views: 3414 published_date: { seconds: 1504446966 } @@ -3311,69 +3346,66 @@ pr_id_to_video: { url: "https://github.com/tensorflow/models/tree/master/research/deeplab" owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/drscotthawley/SPNet" - owner: "drscotthawley" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/tanreinama/XceptionHourgrass---PyTorch" + owner: "tanreinama" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "Object detection for ESPI images of oscillating steelpan drums" + description: "Stacked Hourgrass Network using Xception Blocks." } repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" + url: "https://github.com/LouisFoucard/w-net" + owner: "LouisFoucard" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" + number_of_stars: 200 + description: "w-net: a convolutional neural network architecture for the self-supervised learning of depthmap from pairs of stereo images." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + url: "https://github.com/crowdAI/crowdai-musical-genre-recognition-starter-kit" + owner: "crowdAI" + framework: FRAMEWORK_OTHERS + number_of_stars: 53 } repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" + url: "https://github.com/universvm/BacXeption" + owner: "universvm" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + number_of_stars: 3 + description: "Deep Learning Template for bacterial image classification in Keras." } repositories: { - url: "https://github.com/amogh7joshi/engagement-detection" - owner: "amogh7joshi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Engagement Detection, including facial detection and emotion recognition, using CNNs/LSTMs." + url: "https://github.com/Qinxianshen/tianchi_taobao_2018" + owner: "Qinxianshen" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "阿里天池2018 Keypoints Detection of Apparel-Challenge the Baseline " } repositories: { - url: "https://github.com/amogh7joshi/fer" - owner: "amogh7joshi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Engagement Detection, including facial detection and emotion recognition, using CNNs/LSTMs." + url: "https://github.com/Candice-X/w-net-for-image-segmentation" + owner: "Candice-X" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/ced-kin/dog-breed-ai" - owner: "ced-kin" - framework: FRAMEWORK_TENSORFLOW - description: "android application for classifying dog breeds" + url: "https://github.com/markloyman/LungNoduleRetrieval" + owner: "markloyman" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { - url: "https://github.com/krishnakarthi/COVID-19_Prediction" - owner: "krishnakarthi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Diagnose the COVID-19 from patient’s chest X-ray's using Convolution Neural Networks (CNN) Deep Transfer Learning technique in Azure ML workspace" + url: "https://github.com/Yohei-Kawakami/Bouquet" + owner: "Yohei-Kawakami" + framework: FRAMEWORK_OTHERS + description: "Auto Generator for Wedding Movie" } repositories: { - url: "https://github.com/bluejurand/Photos-colorization" - owner: "bluejurand" - framework: FRAMEWORK_TENSORFLOW - description: "Keras repository which colorize black-white images." + url: "https://github.com/dbensoussan/rsna" + owner: "dbensoussan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "RSNA Intracranial Hemorrhage Detection - https://www.kaggle.com/c/rsna-intracranial-hemorrhage-detection/overview" } methods: { name: "Pointwise Convolution" @@ -3429,8 +3461,8 @@ pr_id_to_video: { video: { video_id: "V0dLhyg5_Dw" video_title: "PR-034: Inception and Xception" - number_of_likes: 79 - number_of_views: 10286 + number_of_likes: 82 + number_of_views: 10434 published_date: { seconds: 1505052461 } @@ -3453,23 +3485,25 @@ pr_id_to_video: { authors: "Pang Wei Koh" authors: "Percy Liang" repositories: { - url: "https://github.com/4pygmalion/Federated_learning-filtering-non-influence-data" - owner: "4pygmalion" + url: "https://github.com/bsharchilev/influence_boosting" + owner: "bsharchilev" framework: FRAMEWORK_TENSORFLOW - description: "Federated learning with influence function" + number_of_stars: 53 + description: "Supporting code for the paper \"Finding Influential Training Samples for Gradient Boosted Decision Trees\"" } repositories: { - url: "https://github.com/nimarb/pytorch_influence_functions" - owner: "nimarb" - framework: FRAMEWORK_PYTORCH - number_of_stars: 122 - description: "This is a PyTorch reimplementation of Influence Functions from the ICML2017 best paper: Understanding Black-box Predictions via Influence Functions by Pang Wei Koh and Percy Liang." + url: "https://github.com/ShinKyuY/Understanding-Black-box-Predictions-via-Influence-Functions-tutorial-MNIST-7-vs-1-Classification" + owner: "ShinKyuY" + framework: FRAMEWORK_OTHERS + number_of_stars: 8 + description: "Tiny Tutorial on https://arxiv.org/abs/1703.04730" } repositories: { - url: "https://github.com/kohpangwei/influence-release" - owner: "kohpangwei" + url: "https://github.com/darkonhub/darkon" + owner: "darkonhub" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 588 + number_of_stars: 233 + description: "Toolkit to Hack Your Deep Learning Models" } repositories: { is_official: true @@ -3485,45 +3519,43 @@ pr_id_to_video: { description: "The repository accompanying the research paper \"Phase Detection with Neural Networks: Interpreting the Black Box\" by A. Dawid, P. Huembeli, M. Tomza, M. Lewenstein, and A. Dauphin" } repositories: { - url: "https://github.com/TooTouch/WhiteBox-Part2" - owner: "TooTouch" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "The White Box Project is a project that introduces many ways to solve the part of the black box of machine learning. This project is based on Interpretable Machine Learning by Christoph Molnar. I recommend you to read the book first and practice this project." - } - repositories: { - url: "https://github.com/bsharchilev/influence_boosting" - owner: "bsharchilev" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 54 - description: "Supporting code for the paper \"Finding Influential Training Samples for Gradient Boosted Decision Trees\"" + url: "https://github.com/goldin2008/modeling-project" + owner: "goldin2008" + framework: FRAMEWORK_OTHERS + description: "Data Science Modeling Project" } repositories: { - url: "https://github.com/ShinKyuY/Understanding-Black-box-Predictions-via-Influence-Functions-tutorial-MNIST-7-vs-1-Classification" + url: "https://github.com/ShinKyuY/Understanding-Black-box-Predictions-via-Influence-Functions-tutorial-MNIST" owner: "ShinKyuY" framework: FRAMEWORK_OTHERS number_of_stars: 8 description: "Tiny Tutorial on https://arxiv.org/abs/1703.04730" } repositories: { - url: "https://github.com/darkonhub/darkon" - owner: "darkonhub" + url: "https://github.com/AnonymizedAuthor663/NNIF_adv_defense" + owner: "AnonymizedAuthor663" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 233 - description: "Toolkit to Hack Your Deep Learning Models" + description: "NNIF adversarial detection" } repositories: { - url: "https://github.com/goldin2008/modeling-project" - owner: "goldin2008" - framework: FRAMEWORK_OTHERS - description: "Data Science Modeling Project" + url: "https://github.com/giladcohen/NNIF_adv_defense" + owner: "giladcohen" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "Detection of adversarial examples using influence functions and nearest neighbors" + } + repositories: { + url: "https://github.com/qmkakaxi/FederatedLearning" + owner: "qmkakaxi" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 } } video: { video_id: "xlmlY8WHjkU" video_title: "PR-035: Understanding Black-box Predictions via Influence Functions (2017)" number_of_likes: 26 - number_of_views: 3415 + number_of_views: 3435 published_date: { seconds: 1505051523 } @@ -3552,7 +3584,7 @@ pr_id_to_video: { url: "https://github.com/tensorflow/models" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70579 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } } @@ -3560,7 +3592,7 @@ pr_id_to_video: { video_id: "S_fbBYbXypc" video_title: "PR-036: Learning to Remember Rare Events" number_of_likes: 7 - number_of_views: 1491 + number_of_views: 1496 published_date: { seconds: 1505657142 } @@ -3589,20 +3621,6 @@ pr_id_to_video: { authors: "Victor Zhong" authors: "Romain Paulus" authors: "Richard Socher" - repositories: { - url: "https://github.com/DongjunLee/dmn-tensorflow" - owner: "DongjunLee" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 - description: "TensorFlow implementation of 'Ask Me Anything: Dynamic Memory Networks for Natural Language Processing (2015)'" - } - repositories: { - url: "https://github.com/scakc/QAwiki" - owner: "scakc" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Question Answering models that can get an answer from Wikipedia pages and select a sentence subset as a reply to your question." - } repositories: { url: "https://github.com/navodhya/DMN" owner: "navodhya" @@ -3650,6 +3668,20 @@ pr_id_to_video: { number_of_stars: 5 description: "Keras implementation of the dynamic memory networks from https://arxiv.org/pdf/1603.01417.pdf" } + repositories: { + url: "https://github.com/scakc/QAwiki" + owner: "scakc" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Question Answering models that can get an answer from Wikipedia pages and select a sentence subset as a reply to your question." + } + repositories: { + url: "https://github.com/DongjunLee/dmn-tensorflow" + owner: "DongjunLee" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 41 + description: "TensorFlow implementation of 'Ask Me Anything: Dynamic Memory Networks for Natural Language Processing (2015)'" + } methods: { name: "Softmax" full_name: "Softmax" @@ -3670,7 +3702,7 @@ pr_id_to_video: { video_id: "oxSrjuspQEs" video_title: "PR-037: Ask me anything: Dynamic memory networks for natural language processing" number_of_likes: 24 - number_of_views: 2364 + number_of_views: 2373 published_date: { seconds: 1505654553 } @@ -3694,75 +3726,78 @@ pr_id_to_video: { authors: "Jonathon Shlens" authors: "Christian Szegedy" repositories: { - url: "https://github.com/Jeffkang-94/pytorch-adversarial-attack" - owner: "Jeffkang-94" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Implementation of gradient-based adversarial attack(FGSM,MI-FGSM,PGD)" + url: "https://github.com/sdemyanov/ConvNet" + owner: "sdemyanov" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 236 + description: "Convolutional Neural Networks for Matlab for classification and segmentation, including Invariang Backpropagation (IBP) and Adversarial Training (AT) algorithms. Trained on GPU, require cuDNN v5." } repositories: { - url: "https://github.com/anirudh9784/Adversarial-Defense" - owner: "anirudh9784" + url: "https://github.com/tensorflow/cleverhans" + owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + number_of_stars: 5218 + description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" } repositories: { - url: "https://github.com/anirudh9784/Major_Project" - owner: "anirudh9784" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + url: "https://github.com/locuslab/convex_adversarial" + owner: "locuslab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 301 + description: "A method for training neural networks that are provably robust to adversarial attacks. " } repositories: { - url: "https://github.com/openai/cleverhans" - owner: "openai" + url: "https://github.com/drewbarot/Un-CNN" + owner: "drewbarot" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5191 - description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" + description: "This library modifies an image such that a CNN is unable to classify it. " } repositories: { - url: "https://github.com/cleverhans-lab/cleverhans" - owner: "cleverhans-lab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5190 - description: "An adversarial example library for constructing attacks, building defenses, and benchmarking both" + url: "https://github.com/cfinlay/tulip" + owner: "cfinlay" + framework: FRAMEWORK_PYTORCH + number_of_stars: 17 + description: "Scaleable input gradient regularization" } repositories: { - url: "https://github.com/dunky11/adversarial-frontier-stitching" - owner: "dunky11" + url: "https://github.com/jrguo/FastGradientSignMNIST" + owner: "jrguo" framework: FRAMEWORK_TENSORFLOW number_of_stars: 6 - description: "Implementation of \"Adversarial Frontier Stitching for Remote Neural Network Watermarking\" in TensorFlow." + description: "Basic implementation of FGSM attack on very basic MNIST model." } repositories: { - url: "https://github.com/Jupetus/ExplainableAI" - owner: "Jupetus" + url: "https://github.com/1Konny/FGSM" + owner: "1Konny" framework: FRAMEWORK_PYTORCH - description: "Collection of ways to explain NN outputs" + number_of_stars: 155 + description: "Simple pytorch implementation of FGSM and I-FGSM" } repositories: { - url: "https://github.com/pwj1996/mycleverhans" - owner: "pwj1996" + url: "https://github.com/bingcheng45/hnr-extension" + owner: "bingcheng45" framework: FRAMEWORK_TENSORFLOW - description: "修改的cleverhans框架" + number_of_stars: 6 + description: "for Hack n Roll NUS 2019" } repositories: { - url: "https://github.com/SifatMd/Research-Papers" - owner: "SifatMd" + url: "https://github.com/alfrei/ml_blitz_2018" + owner: "alfrei" framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Solutions for Yandex (Яндекс) ML blitz 2018: https://contest.yandex.ru/contest/8470" } repositories: { - url: "https://github.com/axelbrando/Mixture-Density-Networks-for-distribution-and-uncertainty-estimation" - owner: "axelbrando" + url: "https://github.com/KeAWang/BayesianGAN4AdversarialAttacks" + owner: "KeAWang" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 261 - description: "A generic Mixture Density Networks (MDN) implementation for distribution and uncertainty estimation by using Keras (TensorFlow)" } } video: { video_id: "7hRO2bS810M" video_title: "PR-038: Explaining and Harnessing Adversarial Examples" number_of_likes: 7 - number_of_views: 1548 + number_of_views: 1558 published_date: { seconds: 1507170279 } @@ -3785,67 +3820,72 @@ pr_id_to_video: { authors: "Yarin Gal" authors: "Zoubin Ghahramani" repositories: { - url: "https://github.com/cdebeunne/uncertainties_CNN" - owner: "cdebeunne" + url: "https://github.com/marcovirgolin/UncertaintyEstimationInDeepNets" + owner: "marcovirgolin" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Attempt to reproduce the toy experiment of http://bit.ly/2C9Z8St with an ensemble of nets and with dropout." + } + repositories: { + url: "https://github.com/jelleman8/TractSeg" + owner: "jelleman8" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "A repo for toy examples to test uncertainties estimation of neural networks" } repositories: { - url: "https://github.com/asharakeh/probdet" - owner: "asharakeh" + url: "https://github.com/MIC-DKFZ/TractSeg" + owner: "MIC-DKFZ" framework: FRAMEWORK_PYTORCH - number_of_stars: 30 - description: "Code for \"Estimating and Evaluating Regression Predictive Uncertainty in Deep Object Detectors.\" (ICLR 2021)" + number_of_stars: 116 + description: "Automatic White Matter Bundle Segmentation" } repositories: { - url: "https://github.com/erickgalinkin/dropout_privacy" - owner: "erickgalinkin" + url: "https://github.com/mrahtz/learning-from-human-preferences" + owner: "mrahtz" framework: FRAMEWORK_TENSORFLOW - description: "Project repository for Drexel CS590 " + number_of_stars: 169 + description: "Reproduction of OpenAI and DeepMind's \"Deep Reinforcement Learning from Human Preferences\"" } repositories: { - url: "https://github.com/MayarLotfy/bayesianNN" - owner: "MayarLotfy" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/arodriguezca/uncertainty-ts-forecasting" - owner: "arodriguezca" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/kosyoshida/simple-keras" + owner: "kosyoshida" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } repositories: { - url: "https://github.com/aredier/monte_carlo_dropout" - owner: "aredier" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "using monte carlo dropout to have uncertainty estimation of predictions" + url: "https://github.com/arneschmidt/bayesian_deep_learning" + owner: "arneschmidt" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "Gaussian Processes and Bayesian Neural Networks" } repositories: { - url: "https://github.com/agnesdeng/misle" - owner: "agnesdeng" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "Multiple imputation through statistical learning" + url: "https://github.com/omacshane/basicbayesDL" + owner: "omacshane" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 + description: "Basic Bayesian Deep Learning" } repositories: { - url: "https://github.com/gtegner/hyper-gan" - owner: "gtegner" + url: "https://github.com/cdebeunne/uncertainties_CNN" + owner: "cdebeunne" framework: FRAMEWORK_PYTORCH - description: "Uncertainty Estimation with HyperGANS in PyTorch!" + number_of_stars: 1 + description: "A repo for toy examples to test uncertainties estimation of neural networks" } repositories: { - url: "https://github.com/marcovirgolin/UncertaintyEstimationInDeepNets" - owner: "marcovirgolin" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Attempt to reproduce the toy experiment of http://bit.ly/2C9Z8St with an ensemble of nets and with dropout." + url: "https://github.com/kenya-sk/mc_dropout_tensorflow" + owner: "kenya-sk" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "This repository reimplemented \"MC Dropout\" by tensorflow 2.0 Eager Extension." } repositories: { - url: "https://github.com/jelleman8/TractSeg" - owner: "jelleman8" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/yaringal/DropoutUncertaintyExps" + owner: "yaringal" + framework: FRAMEWORK_OTHERS + number_of_stars: 390 + description: "Experiments used in \"Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning\"" } methods: { name: "Monte Carlo Dropout" @@ -3860,8 +3900,8 @@ pr_id_to_video: { video: { video_id: "aU91bDGmy7I" video_title: "PR-039: Dropout as a Bayesian approximation" - number_of_likes: 58 - number_of_views: 5150 + number_of_likes: 60 + number_of_views: 5226 published_date: { seconds: 1508076910 } @@ -3891,24 +3931,23 @@ pr_id_to_video: { authors: "Andrew Senior" authors: "Koray Kavukcuoglu" repositories: { - url: "https://github.com/pbrandl/aNN_Audio" - owner: "pbrandl" - framework: FRAMEWORK_PYTORCH - description: "Digital twin of analog audio distortion devices (WavNet based)." + url: "https://github.com/peustr/wavenet" + owner: "peustr" + framework: FRAMEWORK_OTHERS + number_of_stars: 15 + description: "Basic implementation of the WaveNet as described in the paper published by DeepMind" } repositories: { - url: "https://github.com/ibab/tensorflow-wavenet" - owner: "ibab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5129 - description: "A TensorFlow implementation of DeepMind's WaveNet paper" + url: "https://github.com/basameera/NIPS_week_4_5" + owner: "basameera" + framework: FRAMEWORK_OTHERS + description: "Neural Imformation Processing Systems" } repositories: { - url: "https://github.com/otosense/slang" - owner: "otosense" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A light weight version of Slang: Tools to build a language of sound." + url: "https://github.com/ShotDownDiane/tcn-master" + owner: "ShotDownDiane" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 } repositories: { url: "https://github.com/isadrtdinov/wavenet" @@ -3918,44 +3957,41 @@ pr_id_to_video: { description: "WaveNet vocoder implementation for speech synthesis task" } repositories: { - url: "https://github.com/AI-Huang/WaveNet" - owner: "AI-Huang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Keras and PyTorch implementations for Google's WaveNet" + url: "https://github.com/ZTianle/keras-tcn-solar" + owner: "ZTianle" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 } repositories: { - url: "https://github.com/stdereka/liverpool-ion-switching" - owner: "stdereka" + url: "https://github.com/PhilippeNguyen/keras_wavenet" + owner: "PhilippeNguyen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 13 - description: "Liverpool Ion Switching kaggle competition 2nd place winning solution" + number_of_stars: 8 + description: "keras implementation of wavenet/parallel wavenet" } repositories: { - url: "https://github.com/pascalbakker/WaveNet-Implementation" - owner: "pascalbakker" + url: "https://github.com/adityaagrawal7/speech-to-text-wavenet" + owner: "adityaagrawal7" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Tensorflow implementation of Wavenet architecture " } repositories: { - url: "https://github.com/randomrandom/deep-atrous-cnn-sentiment" - owner: "randomrandom" + url: "https://github.com/scpark20/universal-music-translation" + owner: "scpark20" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Deep-Atrous-CNN-Text-Network: End-to-end word level model for sentiment analysis and other text classifications" + number_of_stars: 26 + description: "A Universal Music Translation Network Implementation" } repositories: { - url: "https://github.com/sriharireddypusapati/speech-to-text-wavenet2" - owner: "sriharireddypusapati" + url: "https://github.com/liguigui/speech-to-text-wavenet" + owner: "liguigui" framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/thorwhalen/slang" - owner: "thorwhalen" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A light weight version of Slang: Tools to build a language of sound." + url: "https://github.com/TanUkkii007/wavenet" + owner: "TanUkkii007" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: " An implementation of WaveNet: A Generative Model for Raw Audio https://arxiv.org/abs/1609.03499" } methods: { name: "WaveNet" @@ -3982,7 +4018,7 @@ pr_id_to_video: { video_id: "GyQnex_DK2k" video_title: "PR-040: WaveNet - A Generative Model for Raw Audio" number_of_likes: 64 - number_of_views: 7190 + number_of_views: 7234 published_date: { seconds: 1508077701 } @@ -4007,77 +4043,75 @@ pr_id_to_video: { authors: "Samy Bengio" authors: "Dumitru Erhan" repositories: { - url: "https://github.com/supreethub/Image-Captioning" - owner: "supreethub" + url: "https://github.com/Data-drone/cvnd_image_captioning" + owner: "Data-drone" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A complete pipeline of Convolutional Neural Networks (CNN) and Recurrent Neural Networks (RNN) knowledge to build a deep learning model that produces captions given an input image." + description: "Udacity Computer Vision Nanodegree Project 2" } repositories: { - url: "https://github.com/jelifysh/Image-Captioning" - owner: "jelifysh" + url: "https://github.com/hx19940102/Image-Captioning" + owner: "hx19940102" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 17 - description: "Implemented 3 different architectures to tackle the Image Caption problem, i.e, Merged Encoder-Decoder - Bahdanau Attention - Transformers" + description: "Image captioning implementation based on Vinyals et al.'s paper." } repositories: { - url: "https://github.com/juletx/image-caption-generation" - owner: "juletx" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Automatic Image Caption Generation model that uses a CNN to condition a LSTM based language model" + url: "https://github.com/Ohara124c41/CVND-Image_Captioning" + owner: "Ohara124c41" + framework: FRAMEWORK_PYTORCH + description: "Computer Vision Nanodegree image captioning project" } repositories: { - url: "https://github.com/Djmcflush/Quantum-Hackathon" - owner: "Djmcflush" + url: "https://github.com/nishimehta/Image_Captioning" + owner: "nishimehta" framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/sd2001/Image2Caption" - owner: "sd2001" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "🎥Image2Caption🔤: Upload an image and let the model generate a caption for you🤖." + url: "https://github.com/SathwikTejaswi/Neural-Image-Captioning" + owner: "SathwikTejaswi" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "This is an implementation of the paper \"Show and Tell: A Neural Image Caption Generator\"." } repositories: { - url: "https://github.com/sd2001/Auto-Image2Caption" - owner: "sd2001" + url: "https://github.com/jazzsaxmafia/show_and_tell.tensorflow" + owner: "jazzsaxmafia" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "🎥Image2Caption🔤: Upload an image and let the model generate a caption for you🤖." + number_of_stars: 292 } repositories: { - url: "https://github.com/Tamim-MR14/Image_Caption_Generator" - owner: "Tamim-MR14" - framework: FRAMEWORK_PYTORCH - description: "Project Done as a part of requirements of Graduation of Udacity computer Vision Nanodegree" + url: "https://github.com/yashk2810/Image-Captioning" + owner: "yashk2810" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 298 + description: "Image Captioning using InceptionV3 and beam search" } repositories: { - url: "https://github.com/simnyatsanga/image-caption-generator" - owner: "simnyatsanga" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/longjj/Caffe-SGDR" + owner: "longjj" + framework: FRAMEWORK_OTHERS number_of_stars: 2 - description: "Image Caption Generators in TensorFlow and Keras" + description: "Caffe implementation of SGDR" } repositories: { - url: "https://github.com/neerav47/Image-Captioning" - owner: "neerav47" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "Complete pipeline to predict captions for a given image." + url: "https://github.com/samim23/NeuralTalkAnimator" + owner: "samim23" + framework: FRAMEWORK_OTHERS + number_of_stars: 93 + description: "Describing Videos with Neural Networks" } repositories: { - url: "https://github.com/atharv6/Image-Captioning" - owner: "atharv6" + url: "https://github.com/nalbert9/Image-Captioning" + owner: "nalbert9" framework: FRAMEWORK_PYTORCH - description: "Generating Captions from Images" + number_of_stars: 10 + description: "Computer Vision: Generate captions that describe the contents of images using PyTorch" } } video: { video_id: "BrmCnoYhQb4" video_title: "PR-041: Show and Tell: A Neural Image Caption Generator" - number_of_likes: 26 - number_of_views: 4520 + number_of_likes: 27 + number_of_views: 4538 published_date: { seconds: 1508678893 } @@ -4100,72 +4134,69 @@ pr_id_to_video: { authors: "Diederik P. Kingma" authors: "Jimmy Ba" repositories: { - url: "https://github.com/MalayAgr/DeepNeuralNetwork-Scratch" - owner: "MalayAgr" + url: "https://github.com/SamuelMarks/doctrans" + owner: "SamuelMarks" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Different kinds of deep neural networks (DNNs) implemented from scratch using Python and NumPy, with a TensorFlow-like object-oriented API. " + number_of_stars: 4 + description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." } repositories: { - url: "https://github.com/vanyle/vlearn" - owner: "vanyle" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/kkrajczar/DSGeneric" + owner: "kkrajczar" + framework: FRAMEWORK_OTHERS number_of_stars: 1 - description: "A machine learning framework written in C++ designed for distributed computing " } repositories: { - url: "https://github.com/joseluis1061/neuralnilm" - owner: "joseluis1061" + url: "https://github.com/mohamedameen93/German-Traffic-Sign-Classification-Using-TensorFlow" + owner: "mohamedameen93" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "Neural nilm python 3.3" + number_of_stars: 51 + description: "In this project, I used Python and TensorFlow to classify traffic signs. Dataset used: German Traffic Sign Dataset. This dataset has more than 50,000 images of 43 classes. I was able to reach a +99% validation accuracy, and a 97.3% testing accuracy." } repositories: { - url: "https://github.com/chuiyunjun/projectCSC413" - owner: "chuiyunjun" + url: "https://github.com/Anunay1234/Sentiment-Analysis-using-LSTM" + owner: "Anunay1234" framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/nnaisense/pgpelib" - owner: "nnaisense" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - description: "A mini library for Policy Gradients with Parameter-based Exploration, with reference implementation of the ClipUp optimizer (https://arxiv.org/abs/2008.02387) from NNAISENSE." + url: "https://github.com/jpjuvo/64-3D-RaSGAN" + owner: "jpjuvo" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 29 + description: "RaSGAN 3D object generation" } repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/optimizers" - owner: "labml_nn" + url: "https://github.com/TimRoith/BregmanLearning" + owner: "TimRoith" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + number_of_stars: 1 + description: "Optimizing neural networks via an inverse scale space flow." } repositories: { - url: "https://github.com/offscale/cdd-python" - owner: "offscale" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." + url: "https://github.com/hsvgbkhgbv/TACTHMC" + owner: "hsvgbkhgbv" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Thermostat-assisted continuously-tempered Hamiltonian Monte Carlo for Bayesian learning" } repositories: { - url: "https://github.com/SamuelMarks/doctrans" - owner: "SamuelMarks" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Open API to/fro routes, models, and tests. Convert between docstrings, classes, methods, argparse, and SQLalchemy." + url: "https://github.com/zhuchen03/mva" + owner: "zhuchen03" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 } repositories: { - url: "https://github.com/safakkbilici/Academic-Paper-Title-Recommendation" - owner: "safakkbilici" + url: "https://github.com/Intel-bigdata/imllib-spark" + owner: "Intel-bigdata" framework: FRAMEWORK_OTHERS - number_of_stars: 13 - description: "Supervised text summarization (title generation/recommendation) based on academic paper abstracts, with Seq2Seq LSTM and the power of Transfer Learning and T5." + number_of_stars: 158 } repositories: { - url: "https://github.com/JaneliaSciComp/SongExplorer" - owner: "JaneliaSciComp" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "deep learning for acoustic signals" + url: "https://github.com/hsvgbkhgbv/Thermostat-assisted-continuously-tempered-Hamiltonian-Monte-Carlo-for-Bayesian-learning" + owner: "hsvgbkhgbv" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Thermostat-assisted continuously-tempered Hamiltonian Monte Carlo for Bayesian learning" } methods: { name: "Adam" @@ -4181,8 +4212,8 @@ pr_id_to_video: { video: { video_id: "KN120w3PZIA" video_title: "PR-042: Adam: A Method for Stochastic Optimization" - number_of_likes: 39 - number_of_views: 4186 + number_of_likes: 40 + number_of_views: 4201 published_date: { seconds: 1508682336 } @@ -4206,19 +4237,23 @@ pr_id_to_video: { authors: "Andrew Dai" authors: "Quoc V. Le" repositories: { - is_official: true - url: "https://github.com/hardmaru/supercell" - owner: "hardmaru" + url: "https://github.com/g1910/HyperNetworks" + owner: "g1910" + framework: FRAMEWORK_PYTORCH + number_of_stars: 121 + description: "PyTorch implementation of HyperNetworks (Ha et al., ICLR 2017) for ResNet (Residual Networks)" + } + repositories: { + url: "https://github.com/gahaalt/continual-learning-overview" + owner: "gahaalt" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 183 - description: "supercell" + number_of_stars: 5 } repositories: { - url: "https://github.com/lab-ml/nn" - owner: "lab-ml" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3215 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + url: "https://github.com/gahaalt/continual-learning-with-hypernets" + owner: "gahaalt" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 } repositories: { url: "https://github.com/gtegner/hyper-gan" @@ -4227,23 +4262,19 @@ pr_id_to_video: { description: "Uncertainty Estimation with HyperGANS in PyTorch!" } repositories: { - url: "https://github.com/gahaalt/continual-learning-with-hypernets" - owner: "gahaalt" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - } - repositories: { - url: "https://github.com/g1910/HyperNetworks" - owner: "g1910" + url: "https://github.com/lab-ml/nn" + owner: "lab-ml" framework: FRAMEWORK_PYTORCH - number_of_stars: 115 - description: "PyTorch implementation of HyperNetworks (Ha et al., ICLR 2017) for ResNet (Residual Networks)" + number_of_stars: 3485 + description: "🧑‍🏫 Implementations/tutorials of deep learning papers with side-by-side notes 📝; including transformers (original, xl, switch, feedback, vit), optimizers (adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), 🎮 reinforcement learning (ppo, dqn), capsnet, distillation, etc. 🧠" } repositories: { - url: "https://github.com/gahaalt/continual-learning-overview" - owner: "gahaalt" + is_official: true + url: "https://github.com/hardmaru/supercell" + owner: "hardmaru" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 + number_of_stars: 183 + description: "supercell" } methods: { name: "HyperNetwork" @@ -4255,7 +4286,7 @@ pr_id_to_video: { video_id: "-tUQXSdEsMk" video_title: "PR-043: HyperNetworks" number_of_likes: 13 - number_of_views: 1710 + number_of_views: 1737 published_date: { seconds: 1509287449 } @@ -4284,71 +4315,69 @@ pr_id_to_video: { authors: "Marco Andreetto" authors: "Hartwig Adam" repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" + url: "https://github.com/ShivamPrajapati2001/People_Counter" + owner: "ShivamPrajapati2001" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } - repositories: { - url: "https://github.com/drscotthawley/SPNet" - owner: "drscotthawley" - framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 - description: "Object detection for ESPI images of oscillating steelpan drums" + description: "This is Real Time People Counting using OpenCV" } repositories: { - url: "https://github.com/prasadji/Flower-Classifaction-with-Fine-Tuned-Mobilenet" - owner: "prasadji" + url: "https://github.com/tensorflow/models/tree/master/research/slim" + owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + number_of_stars: 70934 + description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" - framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" + url: "https://github.com/emilianavt/OpenSeeFace" + owner: "emilianavt" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 387 + description: "Robust realtime face and facial landmark tracking on CPU with Unity integration" } repositories: { - url: "https://github.com/akrapukhin/MobileNetV3" - owner: "akrapukhin" + url: "https://github.com/ruinmessi/RFBNet" + owner: "ruinmessi" framework: FRAMEWORK_PYTORCH - description: "An implementation of the MobileNetV3 models in Pytorch with scripts for training, testing and measuring latency." + number_of_stars: 1299 + description: "Receptive Field Block Net for Accurate and Fast Object Detection, ECCV 2018" } repositories: { - url: "https://github.com/rsreetech/MultiModalSearch" - owner: "rsreetech" + url: "https://github.com/TurtleGo/project-vehicle-detect" + owner: "TurtleGo" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "In this repository I demonstrate how you can perform multimodal(image+text) search to find similar images+texts given a test image+text from a multimodal (texts+images) database . I use the Kaggle Shopee dataset. I use Tensorflow MobileNet CNN and hugging face sentence transformers BERT to extract image and text embeddings to create a joint embedding search space. Given an image and it text description I extract joint embedding and then use nearest neighbours algorithm to find top 5 similar images+texts description from my joint embedding search space" + description: "project-vehicle-detect" } repositories: { - url: "https://github.com/Video-Streaming-Pipeline/Video-Streaming-Pipeline" - owner: "Video-Streaming-Pipeline" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "실시간 이미지 처리 모델을 위한 모바일, 클라우드 영상 전송 파이프라인 개발" + url: "https://github.com/cftang0827/pedestrian_recognition" + owner: "cftang0827" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 29 + description: "A simple human recognition api for re-ID usage, power by paper https://arxiv.org/abs/1703.07737" } repositories: { - url: "https://github.com/SalvadorAlbarran/TFG2020" - owner: "SalvadorAlbarran" + url: "https://github.com/Tsejing/object_detection" + owner: "Tsejing" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Aceleración de AI en dispositivos de bajo consumo" + description: "利用slim框架和object_detection框架,搭建一个目标检测模型,预训练模型为ssd_mobilenet_v1。" } repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - owner: "lpirola13" + url: "https://github.com/pessimiss/ai100-w8-master" + owner: "pessimiss" framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." } repositories: { - url: "https://github.com/Rishit-dagli/Greenathon-Plant-AI" - owner: "Rishit-dagli" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 27 - description: "Identify Diseases in Plants☘️ with Machine Learning on the web using TFJS" + url: "https://github.com/BIG-CHENG/FaceRec" + owner: "BIG-CHENG" + framework: FRAMEWORK_OTHERS + description: "Face Recognition System" + } + repositories: { + url: "https://github.com/Edmonton-School-of-AI/ml5-Simple-Image-Classification" + owner: "Edmonton-School-of-AI" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "ml5 - Simple Image Classification using MobileNet" } methods: { name: "Average Pooling" @@ -4404,8 +4433,8 @@ pr_id_to_video: { video: { video_id: "7UoOFKcyIvM" video_title: "PR-044: MobileNet" - number_of_likes: 140 - number_of_views: 14903 + number_of_likes: 145 + number_of_views: 15128 published_date: { seconds: 1509456696 } @@ -4430,71 +4459,67 @@ pr_id_to_video: { authors: "Iasonas Kokkinos" authors: "Kevin Murphy" authors: "Alan L. Yuille" - repositories: { - url: "https://github.com/halbielee/EPS" - owner: "halbielee" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "Official PyTorch implementation of \"Railroad is not a Train: Saliency as Pseudo-pixel Supervision for Weakly Supervised Semantic Segmentation\", CVPR2021" - } repositories: { url: "https://github.com/tensorflow/models/tree/master/research/deeplab" owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/CompVis/taming-transformers" - owner: "CompVis" + url: "https://github.com/switchablenorms/SwitchNorm_Segmentation" + owner: "switchablenorms" framework: FRAMEWORK_PYTORCH - number_of_stars: 1610 - description: "Taming Transformers for High-Resolution Image Synthesis" + number_of_stars: 48 + description: "Switchable Normalization for semantic image segmentation and scene parsing." } repositories: { - url: "https://github.com/johnnylu305/Simple-does-it-weakly-supervised-instance-and-semantic-segmentation" - owner: "johnnylu305" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 43 - description: "Weakly Supervised Segmentation by Tensorflow. Implements semantic segmentation in Simple Does It: Weakly Supervised Instance and Semantic Segmentation, by Khoreva et al. (CVPR 2017)." + url: "https://github.com/yaq007/Autofocus-Layer" + owner: "yaq007" + framework: FRAMEWORK_PYTORCH + number_of_stars: 173 + description: "Autofocus Layer for Semantic Segmentation" } repositories: { - url: "https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/configs/deeplabv3" - owner: "configs" - framework: FRAMEWORK_OTHERS - number_of_stars: 1742 - description: "End-to-end image segmentation kit based on PaddlePaddle. " + url: "https://github.com/kazuto1011/deeplab-pytorch" + owner: "kazuto1011" + framework: FRAMEWORK_PYTORCH + number_of_stars: 862 + description: "PyTorch implementation of DeepLab v2 on COCO-Stuff / PASCAL VOC" } repositories: { - url: "https://github.com/NASA-NeMO-Net/NeMO-Net" - owner: "NASA-NeMO-Net" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + url: "https://github.com/violin0847/crowdcounting" + owner: "violin0847" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/leimao/DeepLab-V3" - owner: "leimao" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 80 - description: "Google DeepLab V3 for Image Semantic Segmentation" + url: "https://github.com/warmspringwinds/pytorch-segmentation-detection" + owner: "warmspringwinds" + framework: FRAMEWORK_PYTORCH + number_of_stars: 702 + description: "Image Segmentation and Object Detection in Pytorch" } repositories: { - url: "https://github.com/kuangbixia/DeepLab" - owner: "kuangbixia" + url: "https://github.com/RituYadav92/Image-segmentation" + owner: "RituYadav92" framework: FRAMEWORK_PYTORCH - description: "Backup the source codes I learned and modified." } repositories: { - url: "https://github.com/Media-Smart/vedaseg" - owner: "Media-Smart" + url: "https://github.com/Lxrd-AJ/Advanced_ML" + owner: "Lxrd-AJ" framework: FRAMEWORK_PYTORCH - number_of_stars: 381 - description: "A semantic segmentation toolbox based on PyTorch" } repositories: { - url: "https://github.com/Popcorn-sugar/Deep_v2" - owner: "Popcorn-sugar" + url: "https://github.com/purushothamgowthu/deep-photo-styletransfer" + owner: "purushothamgowthu" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/leimao/DeepLab_v3" + owner: "leimao" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 80 + description: "Google DeepLab V3 for Image Semantic Segmentation" } methods: { name: "Convolution" @@ -4550,8 +4575,8 @@ pr_id_to_video: { video: { video_id: "JiC78rUF4iI" video_title: "PR-045: DeepLab: Semantic Image Segmentation" - number_of_likes: 97 - number_of_views: 8811 + number_of_likes: 100 + number_of_views: 8934 published_date: { seconds: 1509896571 } @@ -4579,18 +4604,19 @@ pr_id_to_video: { authors: "Leonidas Guibas" authors: "Jascha Sohl-Dickstein" repositories: { - url: "https://github.com/bcaitech1/p4-dkt-no_caffeine_no_gain" - owner: "bcaitech1" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "No-Caffeine-No-Gain's Deep Knowledge Tracing (DKT)" + is_official: true + url: "https://github.com/chrispiech/DeepKnowledgeTracing" + owner: "chrispiech" + framework: FRAMEWORK_OTHERS + number_of_stars: 187 + description: "source code for the paper Deep Knowledge Tracing" } repositories: { - url: "https://github.com/YangZhouEdu/DKT_pytorch" - owner: "YangZhouEdu" + url: "https://github.com/jarviszhb/KnowledgeTracing" + owner: "jarviszhb" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Deep Knowledge Tracing by Pytorch" + number_of_stars: 11 + description: "Some implementations of knowledge tracing with pytorch" } repositories: { url: "https://github.com/jdxyw/deepKT" @@ -4600,19 +4626,18 @@ pr_id_to_video: { description: "A repo for knowledge tracing implementation by PyTorch" } repositories: { - is_official: true - url: "https://github.com/chrispiech/DeepKnowledgeTracing" - owner: "chrispiech" - framework: FRAMEWORK_OTHERS - number_of_stars: 186 - description: "source code for the paper Deep Knowledge Tracing" + url: "https://github.com/bcaitech1/p4-dkt-no_caffeine_no_gain" + owner: "bcaitech1" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "No-Caffeine-No-Gain's Deep Knowledge Tracing (DKT)" } repositories: { - url: "https://github.com/jarviszhb/KnowledgeTracing" - owner: "jarviszhb" + url: "https://github.com/YangZhouEdu/DKT_pytorch" + owner: "YangZhouEdu" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Some implementations of knowledge tracing with pytorch" + number_of_stars: 4 + description: "Deep Knowledge Tracing by Pytorch" } methods: { name: "LINE" @@ -4623,7 +4648,7 @@ pr_id_to_video: { video: { video_id: "8hdY6Jns5-k" video_title: "PR-046: Deep Knowledge Tracing" - number_of_views: 2038 + number_of_views: 2064 published_date: { seconds: 1509893052 } @@ -4649,67 +4674,69 @@ pr_id_to_video: { authors: "Aude Oliva" authors: "Antonio Torralba" repositories: { - url: "https://github.com/zhoubolei/CAM" - owner: "zhoubolei" + url: "https://github.com/metalbubble/CAM" + owner: "metalbubble" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1508 + number_of_stars: 1520 description: "Class Activation Mapping" } repositories: { - url: "https://github.com/FrancescoSaverioZuppichini/A-journey-into-Convolutional-Neural-Network-visualization-" - owner: "FrancescoSaverioZuppichini" - framework: FRAMEWORK_PYTORCH - number_of_stars: 188 - description: "A journey into Convolutional Neural Network visualization " - } - repositories: { - url: "https://github.com/frgfm/torch-cam" - owner: "frgfm" - framework: FRAMEWORK_PYTORCH - number_of_stars: 381 - description: "Class activation maps for your PyTorch models (CAM, Grad-CAM, Grad-CAM++, Smooth Grad-CAM++, Score-CAM, SS-CAM, IS-CAM, XGrad-CAM, Layer-CAM)" + url: "https://github.com/pasrichashivam/Convnets_Grad-CAM_Grad-CAM-PlusPlus_Keras_tensorflow-2" + owner: "pasrichashivam" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Explainable AI: For Computer Vision Tasks Using CAM, GradCAM & GradCAM++" } repositories: { - url: "https://github.com/HRanWang/Spatial-Re-Scaling" + url: "https://github.com/HRanWang/Spatial-Attention" owner: "HRanWang" framework: FRAMEWORK_PYTORCH number_of_stars: 129 } repositories: { - url: "https://github.com/vlue-c/PyTorch-Explanations" - owner: "vlue-c" + url: "https://github.com/Azure/AzureChestXRay" + owner: "Azure" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 76 + description: "Intelligent disease prediction system that can help radiologists review Chest X-rays more efficiently." } repositories: { - url: "https://github.com/jsr66/Machine-Learning-Phases-of-Matter-with-Discriminative-Localization-" - owner: "jsr66" - framework: FRAMEWORK_OTHERS + url: "https://github.com/zdcuob/Fully-Convlutional-Neural-Networks-for-state-of-the-art-time-series-classification-" + owner: "zdcuob" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 } repositories: { - url: "https://github.com/Seb-Good/deep_ecg" - owner: "Seb-Good" + url: "https://github.com/chandrakantkhandelwal/PracticeCodes" + owner: "chandrakantkhandelwal" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 53 - description: "A library for classifying single-lead ECG waveforms as either Normal Sinus Rhythm, Atrial Fibrillation, or Other Rhythm." } repositories: { - url: "https://github.com/Tetsuya-Nishikawa/CAM" - owner: "Tetsuya-Nishikawa" + url: "https://github.com/fatLime/Predict-Lung-Disease" + owner: "fatLime" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + } + repositories: { + url: "https://github.com/philipperemy/tensorflow-class-activation-mapping" + owner: "philipperemy" framework: FRAMEWORK_TENSORFLOW - description: "CAM(class activation map)の実験(https://arxiv.org/pdf/1512.04150.pdf)" + number_of_stars: 150 + description: "Learning Deep Features for Discriminative Localization (2016)" } repositories: { - url: "https://github.com/FelixFu520/CAM-Cifar10" - owner: "FelixFu520" + url: "https://github.com/airalcorn2/strike-with-a-pose" + owner: "airalcorn2" framework: FRAMEWORK_PYTORCH + number_of_stars: 71 + description: "A simple GUI tool for generating adversarial poses of objects." } repositories: { - url: "https://github.com/metalbubble/CAM" - owner: "metalbubble" + url: "https://github.com/tensorpack/tensorpack/tree/master/examples/Saliency" + owner: "examples" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1508 - description: "Class Activation Mapping" + number_of_stars: 6061 + description: "A Neural Net Training Interface on TensorFlow, with focus on speed + flexibility" } methods: { name: "Average Pooling" @@ -4726,7 +4753,7 @@ pr_id_to_video: { video_id: "-Z1NIzLxgRU" video_title: "PR047: Learning Deep Features for Discriminative Localization" number_of_likes: 32 - number_of_views: 2238 + number_of_views: 2255 published_date: { seconds: 1510500873 } @@ -4759,7 +4786,7 @@ pr_id_to_video: { video_id: "RlAgB0Ooxaw" video_title: "PR-048: Towards Principled Methods for Training Generative Adversarial Networks" number_of_likes: 19 - number_of_views: 1721 + number_of_views: 1740 published_date: { seconds: 1510652207 } @@ -4788,71 +4815,70 @@ pr_id_to_video: { authors: "Lukasz Kaiser" authors: "Illia Polosukhin" repositories: { - url: "https://github.com/bangoc123/transformer" - owner: "bangoc123" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 - description: "Build English-Vietnamese machine translation with ProtonX Transformer. :D" - } - repositories: { - url: "https://github.com/brainsqueeze/text2vec" - owner: "brainsqueeze" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Contextual embedding for text blobs." + url: "https://github.com/drumpt/Transformer" + owner: "drumpt" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/maroxtn/tun-sentiment" - owner: "maroxtn" + url: "https://github.com/UdbhavPrasad072300/Transformer-Implementations" + owner: "UdbhavPrasad072300" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "My solution in Zindi Tunisian Sentiment Analysis competition. Ranked #1st." + number_of_stars: 18 + description: "Library - Vanilla, ViT, DeiT, BERT, GPT" } repositories: { - url: "https://github.com/han-shi/SparseBERT" - owner: "han-shi" + url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" + owner: "xmu-xiaoma666" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 1492 + description: "🍀 Pytorch implementation of various Attention Mechanisms, MLP, Re-parameter, Convolution, which is helpful to further understand papers.⭐⭐⭐" } repositories: { - url: "https://github.com/rupakdas18/SuperGlue-tasks-using-BERT" - owner: "rupakdas18" + url: "https://github.com/facebookresearch/fairseq" + owner: "facebookresearch" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "In this project we have implemented 2 SuperGlue tasks (RTE and BOOLQ)." + number_of_stars: 3738 + description: "Facebook AI Research Sequence-to-Sequence Toolkit" } repositories: { - url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/machine_translation/transformer" - owner: "machine_translation" - framework: FRAMEWORK_OTHERS - number_of_stars: 1489 - description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." + url: "https://github.com/Matthewdowney18/Transformer_Dialogue" + owner: "Matthewdowney18" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A transformer dialogue model capable of chatting with a human via Telegram" } repositories: { - url: "https://github.com/mitran27/Attention-is-all-you-Need" - owner: "mitran27" - framework: FRAMEWORK_OTHERS - description: "building the Transformer (new world of NLP) completely from scratch" + url: "https://github.com/guillaume-chevalier/Linear-Attention-Recurrent-Neural-Network" + owner: "guillaume-chevalier" + framework: FRAMEWORK_PYTORCH + number_of_stars: 120 + description: "A recurrent attention module consisting of an LSTM cell which can query its own past cell states by the means of windowed multi-head attention. The formulas are derived from the BN-LSTM and the Transformer Network. The LARNN cell with attention can be easily used inside a loop on the cell state, just like any other RNN. (LARNN)" } repositories: { - url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" - owner: "xmu-xiaoma666" + url: "https://github.com/youwontunderstandthis/zhen-trans" + owner: "youwontunderstandthis" framework: FRAMEWORK_PYTORCH - number_of_stars: 840 - description: "🍀 Pytorch implementation of various Attention Mechanisms, MLP, Re-parameter, Convolution, which is helpful to further understand papers.⭐⭐⭐" + description: "zh-en translation with transformer" + } + repositories: { + url: "https://github.com/TSLNIHAOGIT/bert_run" + owner: "TSLNIHAOGIT" + framework: FRAMEWORK_TENSORFLOW + description: "can run on google drive notebook" } repositories: { - url: "https://github.com/stevinc/Transformer_Timeseries" - owner: "stevinc" + url: "https://github.com/kolloldas/torchnlp" + owner: "kolloldas" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Pytorch code for Google's Temporal Fusion Transformer" + number_of_stars: 238 + description: "Easy to use NLP library built on PyTorch and TorchText" } repositories: { - url: "https://github.com/xydaytoy/BMI-NMT" - owner: "xydaytoy" + url: "https://github.com/pomonam/AttentionCluster" + owner: "pomonam" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 + number_of_stars: 37 + description: "TensorFlow Implementation of \"Attention Clusters: Purely Attention Based Local Feature Integration for Video Classification\"" } methods: { name: "Residual Connection" @@ -4908,7 +4934,7 @@ pr_id_to_video: { video: { video_id: "6zGgVIlStXs" video_title: "PR-049: Attention is All You Need" - number_of_views: 7289 + number_of_views: 7349 published_date: { seconds: 1512304902 } @@ -4935,51 +4961,52 @@ pr_id_to_video: { authors: "Wai-kin Wong" authors: "Wang-chun Woo" repositories: { - url: "https://github.com/georgeyiasemis/2D-Convolutional-Recurrent-Neural-Networks-with-PyTorch" - owner: "georgeyiasemis" + url: "https://github.com/trichtu/ConvLSTM-RAU-net" + owner: "trichtu" framework: FRAMEWORK_PYTORCH - description: "2D Convolutional Recurrent Neural Networks implemented in PyTorch" + number_of_stars: 22 + description: "Spatial-temperal Prediction Model based on history observation and WRF numerical prediction " } repositories: { - url: "https://github.com/czifan/ConvLSTM.pytorch" - owner: "czifan" + url: "https://github.com/ndrplz/ConvLSTM_pytorch" + owner: "ndrplz" framework: FRAMEWORK_PYTORCH - number_of_stars: 27 + number_of_stars: 884 + description: "Implementation of Convolutional LSTM in PyTorch." } repositories: { - url: "https://github.com/Tetsuya-Nishikawa/ConvLSTM_DEMO" - owner: "Tetsuya-Nishikawa" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "自作ConvLSTMデモ" + url: "https://github.com/automan000/Convolution_LSTM_pytorch" + owner: "automan000" + framework: FRAMEWORK_PYTORCH + number_of_stars: 682 + description: "Multi-layer convolutional LSTM with Pytorch" } repositories: { url: "https://github.com/rogertrullo/pytorch_convlstm" owner: "rogertrullo" framework: FRAMEWORK_PYTORCH - number_of_stars: 128 + number_of_stars: 130 description: "convolutional lstm implementation in pytorch" } repositories: { - url: "https://github.com/trichtu/ConvLSTM-RAU-net" - owner: "trichtu" - framework: FRAMEWORK_PYTORCH - number_of_stars: 20 - description: "Spatial-temperal Prediction Model based on history observation and WRF numerical prediction " + url: "https://github.com/Tetsuya-Nishikawa/ConvLSTM_DEMO" + owner: "Tetsuya-Nishikawa" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "自作ConvLSTMデモ" } repositories: { - url: "https://github.com/ndrplz/ConvLSTM_pytorch" - owner: "ndrplz" + url: "https://github.com/czifan/ConvLSTM.pytorch" + owner: "czifan" framework: FRAMEWORK_PYTORCH - number_of_stars: 851 - description: "Implementation of Convolutional LSTM in PyTorch." + number_of_stars: 30 } repositories: { - url: "https://github.com/automan000/Convolution_LSTM_pytorch" - owner: "automan000" + url: "https://github.com/georgeyiasemis/2D-Convolutional-Recurrent-Neural-Networks-with-PyTorch" + owner: "georgeyiasemis" framework: FRAMEWORK_PYTORCH - number_of_stars: 675 - description: "Multi-layer convolutional LSTM with Pytorch" + number_of_stars: 1 + description: "2D Convolutional Recurrent Neural Networks implemented in PyTorch" } methods: { name: "Tanh Activation" @@ -5010,8 +5037,8 @@ pr_id_to_video: { video: { video_id: "3cFfCM4CXws" video_title: "PR-050: Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting" - number_of_likes: 38 - number_of_views: 6999 + number_of_likes: 40 + number_of_views: 7093 published_date: { seconds: 1511707163 } @@ -5034,77 +5061,74 @@ pr_id_to_video: { authors: "Mehdi Mirza" authors: "Simon Osindero" repositories: { - url: "https://github.com/Sinestro38/qosf-qgan" - owner: "Sinestro38" + url: "https://github.com/HyperZealot/mxnet-cgan" + owner: "HyperZealot" + framework: FRAMEWORK_TENSORFLOW + description: "CGAN(https://arxiv.org/pdf/1411.1784.pdf) implementation in MXNet. Refered to https://github.com/znxlwm/pytorch-MNIST-CelebA-cGAN-cDCGAN" + } + repositories: { + url: "https://github.com/ajemerson/Recipe-Recommendation-System" + owner: "ajemerson" framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Exploring learnability and optimal hyperparameters of various quantum generative adversarial networks and quantum neural networks using Pennylane. " + number_of_stars: 18 + description: "[CSC722 Project] Data-driven recipe recommendation system using web-scraped recipe data." } repositories: { - url: "https://github.com/asiltureli/gan-in-colab" - owner: "asiltureli" + url: "https://github.com/MakeDirtyCode/cDCGAN-celebA-pytorch" + owner: "MakeDirtyCode" framework: FRAMEWORK_PYTORCH - description: "GAN implementations on Google Colab" + number_of_stars: 4 } repositories: { - url: "https://github.com/AshishSingh2261/GAN" - owner: "AshishSingh2261" + url: "https://github.com/Murali81/InfoGAN" + owner: "Murali81" framework: FRAMEWORK_OTHERS - description: "Contains code for different types of GANs trained on different datasets." - } - repositories: { - url: "https://github.com/YigitGunduc/Conditional-GANs-CGANs" - owner: "YigitGunduc" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Conditional Generative Adversarial Networks(cgans) to convert text to image implemented in Python and TensorFlow & Keras" + description: "A demo script explaining InfoGAN on MNIST Dataset" } repositories: { - url: "https://github.com/kynk94/TF2-Image-Generation" - owner: "kynk94" + url: "https://github.com/ImagingLab/Colorizing-with-GANs" + owner: "ImagingLab" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" + number_of_stars: 218 + description: "Grayscale Image Colorization with Generative Adversarial Networks. https://arxiv.org/abs/1803.05400" } repositories: { - url: "https://github.com/otepencelik/GAN-Artwork-Generation" - owner: "otepencelik" + url: "https://github.com/NathanDeMaria/AugmentedCycleGAN" + owner: "NathanDeMaria" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 11 + description: "A presentation on Augmented CycleGAN and the papers that lead up to it" } repositories: { - url: "https://github.com/MCLYang/RhythmGAN_pytorch" - owner: "MCLYang" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/jettdlee/vis_tac_cross_modal" + owner: "jettdlee" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 3 - description: "The pytorch implementation for RhythmGAN" } repositories: { - url: "https://github.com/Lornatang/CGAN-PyTorch" - owner: "Lornatang" - framework: FRAMEWORK_PYTORCH - description: "Simple implementation of conditional general adverse nets in pytorch machine learning framework" + url: "https://github.com/dineshzende/awesome-deep-learning-resources" + owner: "dineshzende" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 } repositories: { - url: "https://github.com/jamesloyys/PyTorch-Lightning-GAN" - owner: "jamesloyys" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Implementations of various GAN architectures using PyTorch Lightning" + url: "https://github.com/ajemerson/CSC_722_Project" + owner: "ajemerson" + framework: FRAMEWORK_OTHERS + number_of_stars: 18 + description: "[CSC722 Project] Data-driven recipe recommendation system using web-scraped recipe data." } repositories: { - url: "https://github.com/gordicaleksa/pytorch-gans" - owner: "gordicaleksa" - framework: FRAMEWORK_PYTORCH - number_of_stars: 292 - description: "My implementation of various GAN (generative adversarial networks) architectures like vanilla GAN (Goodfellow et al.), cGAN (Mirza et al.), DCGAN (Radford et al.), etc." + url: "https://github.com/vohoaiviet/Advanced-Deep-Learning-with-Keras" + owner: "vohoaiviet" + framework: FRAMEWORK_OTHERS } } video: { video_id: "iCgT8G4PkqI" video_title: "PR-051: Conditional Generative Adversarial Nets" - number_of_likes: 25 - number_of_views: 3518 + number_of_likes: 26 + number_of_views: 3614 published_date: { seconds: 1512310569 } @@ -5161,79 +5185,79 @@ pr_id_to_video: { authors: "Devi Parikh" authors: "Dhruv Batra" repositories: { - url: "https://github.com/AlanYangYi/PhotoStyleRecongnizer" - owner: "AlanYangYi" + url: "https://github.com/hs2k/pytorch-smoothgrad" + owner: "hs2k" framework: FRAMEWORK_PYTORCH - description: "Recognize the photo style using CNN" + number_of_stars: 123 + description: "SmoothGrad implementation in PyTorch " } repositories: { - url: "https://github.com/CMU-CREATE-Lab/deep-smoke-machine" - owner: "CMU-CREATE-Lab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 62 - description: "Deep learning models and dataset for recognizing industrial smoke emissions" + url: "https://github.com/fitushar/3D-Grad-CAM" + owner: "fitushar" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 21 + description: "This repo contains Grad-CAM for 3D volumes." } repositories: { - url: "https://github.com/novice03/timm-vis" - owner: "novice03" + url: "https://github.com/thtang/CheXNet-with-localization" + owner: "thtang" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Visualizer for PyTorch image models" + number_of_stars: 247 + description: "Weakly Supervised Learning for Findings Detection in Medical Images" } repositories: { - url: "https://github.com/sauravmishra1710/EXPLAINABLE-AI---Skin-Cancer-Detection-explained-with-GRADCAM" - owner: "sauravmishra1710" - framework: FRAMEWORK_TENSORFLOW - description: "Diagnose the presence of skin cancer in a person using CNN and as well explain what led the CNN to arrive at the decision. Visual explanations are made utilizing the Gradient-weighted Class Activation Mapping (Grad-CAM), the gradients flowing into the final convolutional layer to produce a coarse localization map highlighting the important regions in the image for considered for arriving at the decision. The original paper for GRADCAM can be found @ https://arxiv.org/abs/1610.02391" + url: "https://github.com/kazuto1011/grad-cam-pytorch" + owner: "kazuto1011" + framework: FRAMEWORK_PYTORCH + number_of_stars: 640 + description: "PyTorch implementation of Grad-CAM, vanilla/guided backpropagation, deconvnet, and occlusion sensitivity maps" } repositories: { - url: "https://github.com/xn1997/pytorch-grad-cam" - owner: "xn1997" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/pasrichashivam/Convnets_Grad-CAM_Grad-CAM-PlusPlus_Keras_tensorflow-2" + owner: "pasrichashivam" + framework: FRAMEWORK_OTHERS number_of_stars: 1 - description: "特征图可视化(个人修改版)" + description: "Explainable AI: For Computer Vision Tasks Using CAM, GradCAM & GradCAM++" } repositories: { - url: "https://github.com/priyavrat-misra/xrays-and-gradcam" - owner: "priyavrat-misra" + url: "https://github.com/Cloud-CV/Grad-CAM" + owner: "Cloud-CV" framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "Classification and Gradient-based Localization of Chest Radiographs using PyTorch." + number_of_stars: 91 + description: ":rainbow: :camera: Gradient-weighted Class Activation Mapping (Grad-CAM) Demo" } repositories: { - url: "https://github.com/jordan-bird/synthetic-fruit-image-generator" - owner: "jordan-bird" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Use a CGAN to generate synthetic images of healthy and unhealthy lemons" + url: "https://github.com/Murali81/Grad-CAM" + owner: "Murali81" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Grad-CAM implementation in Keras for saliency maps" } repositories: { - url: "https://github.com/FrancescoSaverioZuppichini/A-journey-into-Convolutional-Neural-Network-visualization-" - owner: "FrancescoSaverioZuppichini" - framework: FRAMEWORK_PYTORCH - number_of_stars: 188 - description: "A journey into Convolutional Neural Network visualization " + url: "https://github.com/aviatesk/street-feature-analysis" + owner: "aviatesk" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Classifying and analysing streets in the world 10 beautiful cities" } repositories: { - url: "https://github.com/samson6460/tf_keras_gradcamplusplus" - owner: "samson6460" + url: "https://github.com/wawaku/grad-cam-keras" + owner: "wawaku" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "tensorflow.keras implementation of gradcam and gradcam++" + number_of_stars: 16 + description: "Grad-CAM implementation in Keras" } repositories: { - url: "https://github.com/dtanoglidis/DeepShadows" - owner: "dtanoglidis" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Repository for the project \"DeepShadows: Separating LSBGs from artifacts using Deep Learning\"" + url: "https://github.com/chenbys/GradCAM" + owner: "chenbys" + framework: FRAMEWORK_PYTORCH } } video: { video_id: "faGsrPX1yFM" video_title: "PR-053: Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization" number_of_likes: 35 - number_of_views: 6509 + number_of_views: 6576 published_date: { seconds: 1512915707 } @@ -5257,41 +5281,6 @@ pr_id_to_video: { authors: "Xinyu Zhou" authors: "Mengxiao Lin" authors: "Jian Sun" - repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." - } - repositories: { - url: "https://github.com/tensorpack/tensorpack/tree/master/examples/ImageNetModels" - owner: "examples" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6048 - description: "A Neural Net Training Interface on TensorFlow, with focus on speed + flexibility" - } - repositories: { - url: "https://github.com/afzalahmad0203/Tensorflow-Shufflenet" - owner: "afzalahmad0203" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" - } - repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" - framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" - } - repositories: { - url: "https://github.com/eogussla12/Shufflenet_CIFAR10_Pytorch" - owner: "eogussla12" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Pytorch-Shufflenet-CIFAR10" - } repositories: { url: "https://github.com/MrRen-sdhm/Embedded_Multi_Object_Detection_CNN" owner: "MrRen-sdhm" @@ -5313,6 +5302,13 @@ pr_id_to_video: { number_of_stars: 11 description: "This repo contains code for *Merging and Evolution: Improving Convolutional Neural Networks for Mobile Applications*." } + repositories: { + url: "https://github.com/afzalahmad0203/Tensorflow-Shufflenet" + owner: "afzalahmad0203" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" + } repositories: { url: "https://github.com/europa1610/Tensorflow-Shufflenet" owner: "europa1610" @@ -5326,6 +5322,33 @@ pr_id_to_video: { framework: FRAMEWORK_OTHERS description: "Numpy implementation of shufflenet based on https://arxiv.org/abs/1707.01083" } + repositories: { + url: "https://github.com/minhto2802/keras-shufflenet" + owner: "minhto2802" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + } + repositories: { + url: "https://github.com/marload/ConvNets-TensorFlow2" + owner: "marload" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 64 + description: "⛵️ Implementation a variety of popular Image Classification Models using TensorFlow2. [ResNet, GoogLeNet, VGG, Inception-v3, Inception-v4, MobileNet, MobileNet-v2, ShuffleNet, ShuffleNet-v2, etc...]" + } + repositories: { + url: "https://github.com/zjZSTU/LightWeightCNN" + owner: "zjZSTU" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "轻量化卷积神经网络实现(SqueezeNet/MobileNet/ShuffleNet/MnasNet)" + } + repositories: { + url: "https://github.com/afzalahmad0203/tf_shufflenet" + owner: "afzalahmad0203" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Shufflenet implementation in tensorflow based on https://arxiv.org/abs/1707.01083" + } methods: { name: "Average Pooling" full_name: "Average Pooling" @@ -5380,8 +5403,8 @@ pr_id_to_video: { video: { video_id: "pNuBdj53Hbc" video_title: "PR-054: ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" - number_of_likes: 52 - number_of_views: 6206 + number_of_likes: 53 + number_of_views: 6261 published_date: { seconds: 1513005030 } @@ -5405,68 +5428,71 @@ pr_id_to_video: { authors: "Kyunghyun Cho" authors: "Yoshua Bengio" repositories: { - url: "https://github.com/dl4nlp-tuda2021/deep-learning-for-nlp-lectures" - owner: "dl4nlp-tuda2021" - framework: FRAMEWORK_PYTORCH - number_of_stars: 81 - description: "Deep Learning for Natural Language Processing - Lectures 2021" + url: "https://github.com/mp2893/gram" + owner: "mp2893" + framework: FRAMEWORK_OTHERS + number_of_stars: 198 + description: "Graph-based Attention Model" } repositories: { - url: "https://github.com/prakhargurawa/Neural-Machine-Translation-Keras-Attention" - owner: "prakhargurawa" + url: "https://github.com/eske/seq2seq" + owner: "eske" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Machine translation using LSTM Model. Created two translation models with/without attention mechanisms for translation between French-English and German-English." + number_of_stars: 369 + description: "Attention-based sequence to sequence learning" } repositories: { - url: "https://github.com/AMNAALMGLY/NLP" - owner: "AMNAALMGLY" - framework: FRAMEWORK_OTHERS + url: "https://github.com/THUNLP-MT/THUMT" + owner: "THUNLP-MT" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 584 + description: "An open-source neural machine translation toolkit developed by Tsinghua Natural Language Processing Group" } repositories: { - url: "https://github.com/prakhargurawa/Neural-Machine-Translation-Keras-German-English" - owner: "prakhargurawa" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Machine translation using LSTM Model. Created two translation models with/without attention mechanisms for translation between French-English and German-English." + url: "https://github.com/b-etienne/Seq2seq-PyTorch" + owner: "b-etienne" + framework: FRAMEWORK_PYTORCH + number_of_stars: 58 } repositories: { - url: "https://github.com/millenialSpirou/ift6010" - owner: "millenialSpirou" + url: "https://github.com/DCYN/Ramdomized-Clinical-Trail-Classification" + owner: "DCYN" framework: FRAMEWORK_TENSORFLOW + description: "Applying deeplearning + svm classifier to get randomized clinical trails" } repositories: { - url: "https://github.com/yinghao1019/NLP_and_DL_practice" - owner: "yinghao1019" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "This repository is used for NLP Model practice and learning" + url: "https://github.com/SwordYork/DCNMT" + owner: "SwordYork" + framework: FRAMEWORK_OTHERS + number_of_stars: 71 + description: "Deep Character-Level Neural Machine Translation" } repositories: { - url: "https://github.com/tree-park/kor-to-eng-translation" - owner: "tree-park" + url: "https://github.com/YvesWang/Machine_Translation_NLP" + owner: "YvesWang" framework: FRAMEWORK_PYTORCH - description: "Translator by transforemer and seq2seq (with attention mechanism) - Pytorch" + number_of_stars: 2 } repositories: { - url: "https://github.com/hiun/learning-transformers" - owner: "hiun" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Transformers Tutorials with Open Source Implementations" + url: "https://github.com/sunnysinghnitb/text-corrector-software" + owner: "sunnysinghnitb" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Text Corrector Software uses TensorFlow to train sequence-to-sequence models that are capable of automatically correcting small grammatical errors in conversational written English (e.g. SMS messages)" } repositories: { - url: "https://github.com/xhlulu/arxiv-assistant" - owner: "xhlulu" + url: "https://github.com/farizrahman4u/seq2seq" + owner: "farizrahman4u" framework: FRAMEWORK_OTHERS - description: "A simple webapp for helping you navigate Arxiv.org" + number_of_stars: 3086 + description: "Sequence to Sequence Learning with Keras" } repositories: { - url: "https://github.com/xingniu/sockeye" - owner: "xingniu" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "Sequence-to-sequence framework with a focus on Neural Machine Translation based on Apache MXNet" + url: "https://github.com/atpaino/deep-text-corrector" + owner: "atpaino" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1179 + description: "Deep learning models trained to correct input errors in short, message-like text" } methods: { name: "Tanh Activation" @@ -5483,7 +5509,7 @@ pr_id_to_video: { video_id: "upskBSbA9cA" video_title: "PR-055: Neural Machine Translation by Jointly Learning to Align and Translate" number_of_likes: 27 - number_of_views: 2854 + number_of_views: 2870 published_date: { seconds: 1513516897 } @@ -5507,77 +5533,77 @@ pr_id_to_video: { authors: "Nicholas Frosst" authors: "Geoffrey E Hinton" repositories: { - url: "https://github.com/JoelClingempeel/ExecutiveControl" - owner: "JoelClingempeel" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "This is a work in progress which aims to train a neural network inspired by the interaction between the cortex and the basal ganglia. " + url: "https://github.com/Ugenteraan/CapsNet-MNIST-TF" + owner: "Ugenteraan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Capsule network in Tensorflow for MNIST" } repositories: { - url: "https://github.com/dudyu/capsnet" - owner: "dudyu" + url: "https://github.com/DanielLongo/CapsGAN" + owner: "DanielLongo" framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Capsule Network based GAN" } repositories: { - url: "https://github.com/Oushesh/CapsClassifigner" - owner: "Oushesh" + url: "https://github.com/akanimax/capsule-network-TensorFlow" + owner: "akanimax" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Classifigner on German Sign Dataset" + number_of_stars: 6 + description: "The impending concept of capsule networks has finally arrived at arXiv. link to the publication -> https://arxiv.org/abs/1710.09829 . In this repository, I'll create an implementation using TensorFlow from scratch as an exercise." } repositories: { - url: "https://github.com/Egesabanci/capsuleNetworks" - owner: "Egesabanci" - framework: FRAMEWORK_TENSORFLOW - description: ":pill: CapsNets implementation according to the paper: Dynamic Routing Between Capsules - Sara Sabour, Nicholas Frosst, Geoffrey E Hinton" + url: "https://github.com/JSerowik/Masters_Thesis" + owner: "JSerowik" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Repo for my M.S. thesis on implementing Capsule Neural Networks in both positions in a GAN model" } repositories: { - url: "https://github.com/ecstayalive/Degenerate-capsule-neural-network" - owner: "ecstayalive" + url: "https://github.com/lab-ml/nn/tree/master/labml_nn/capsule_networks" + owner: "labml_nn" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "退化胶囊神经网络是通过改变极少的胶囊神经网络结构并将其应用与一些对位置要求不敏感的一些识别场合,但是保留了其快速泛化的特性" + number_of_stars: 3494 + description: "🧑‍🏫 Implementations/tutorials of deep learning papers with side-by-side notes 📝; including transformers (original, xl, switch, feedback, vit), optimizers (adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), 🎮 reinforcement learning (ppo, dqn), capsnet, distillation, etc. 🧠" } repositories: { - url: "https://github.com/razvanalex/CapsLayer" - owner: "razvanalex" - framework: FRAMEWORK_TENSORFLOW - description: "CapsLayer: An advanced library for capsule theory" + url: "https://github.com/shashankmanjunath/capsnet" + owner: "shashankmanjunath" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "A simple implementation of a capsule network" } repositories: { - url: "https://github.com/naturomics/CapsLayer" - owner: "naturomics" + url: "https://github.com/dshelukh/CapsuleLearner" + owner: "dshelukh" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 351 - description: "CapsLayer: An advanced library for capsule theory" + number_of_stars: 1 + description: "Education project to try capsules in neural nets" } repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/capsule_networks" - owner: "labml_nn" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + url: "https://github.com/Suraj-Panwar/Capsule_Network_based_Deep_Q_learning" + owner: "Suraj-Panwar" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/jelifysh/Capsule-Networks" - owner: "jelifysh" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Pytorch Implementation of Capsule Networks" + url: "https://github.com/dolaram/Capsule-Network-for-MNIST-classification" + owner: "dolaram" + framework: FRAMEWORK_TENSORFLOW + description: "Simple Capsule Neural Network for MNIST classification" } repositories: { - url: "https://github.com/EscVM/Efficient-CapsNet" - owner: "EscVM" + url: "https://github.com/im-ant/Capsules-Guys" + owner: "im-ant" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 128 - description: "Official TensorFlow code for the forthcoming paper \"Efficient-CapsNet: Capsule Network with Self-Attention Routing\"." + description: "COMP551 Fall 2018, Final Project" } } video: { video_id: "_YT_8CT2w_Q" video_title: "PR-056: Capsule Network" - number_of_likes: 68 - number_of_views: 5773 + number_of_likes: 67 + number_of_views: 5800 published_date: { seconds: 1513522378 } @@ -5602,68 +5628,67 @@ pr_id_to_video: { authors: "Piotr Dollár" authors: "Ross Girshick" repositories: { - url: "https://github.com/alexander-pv/maskrcnn_tf2" - owner: "alexander-pv" - framework: FRAMEWORK_TENSORFLOW - description: "Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow V2 with ONNX and TensorRT optimization option. " - } - repositories: { - url: "https://github.com/TejasBajania/Mtech_pro" - owner: "TejasBajania" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/SonginCV/GMPHD_MAF" - owner: "SonginCV" + url: "https://github.com/houssemjebari/Fruit-Detection" + owner: "houssemjebari" framework: FRAMEWORK_OTHERS - number_of_stars: 10 - description: "The official implementation of the GMPHD_MAF Tracker" - } - repositories: { - url: "https://github.com/miaohua1982/simple_fasterrcnn_pytorch" - owner: "miaohua1982" - framework: FRAMEWORK_PYTORCH number_of_stars: 1 } repositories: { - url: "https://github.com/tuguldurs/vivus" - owner: "tuguldurs" + url: "https://github.com/phykn/film-defect-detection" + owner: "phykn" framework: FRAMEWORK_PYTORCH number_of_stars: 2 - description: "venous intravascular ultrasound image processing" } repositories: { - url: "https://github.com/SonginCV/GMPHD_SAF" - owner: "SonginCV" - framework: FRAMEWORK_OTHERS - number_of_stars: 10 - description: "The official implementation of the GMPHD_MAF Tracker" + url: "https://github.com/matterport/Mask_RCNN" + owner: "matterport" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 20411 + description: "Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow" } repositories: { - url: "https://github.com/alexalm4190/Mask_RCNN-Vizzy_Hand" - owner: "alexalm4190" + url: "https://github.com/fdac18/ForensicImages" + owner: "fdac18" framework: FRAMEWORK_OTHERS number_of_stars: 1 + description: "Final project for DA" } repositories: { - url: "https://github.com/deolipankaj/Stone_Detection_MRCNN" - owner: "deolipankaj" + url: "https://github.com/kbardool/mrcnn3" + owner: "kbardool" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Stone detection in an off-road environment with Mask R-CNN" + description: "Contextual Inference version 2" } repositories: { - url: "https://github.com/EmGarr/kerod" - owner: "EmGarr" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 36 - description: "DETR - Faster RCNN implementation in tensorflow 2" + url: "https://github.com/noelcodes/Mask_RCNN" + owner: "noelcodes" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Exploring Mask_RCNN. Credits to matterport for codes and mark jay for the tutorial. " } repositories: { - url: "https://github.com/polospeter/TensorFlow-Advanced-Techniques-Specialization" - owner: "polospeter" + url: "https://github.com/stanleycelestin1/AirsimDetectron" + owner: "stanleycelestin1" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 + number_of_stars: 2 + description: "Pipelining Object Detection with a Simulated Enviroment" + } + repositories: { + url: "https://github.com/rickyHong/py-faster-rcnn-repl-cudnn5-support" + owner: "rickyHong" + framework: FRAMEWORK_OTHERS + description: "git을 사용하다 브랜치 전체를 clone하지 않고 특정 브랜치 하나만 clone하는 것이 가능하다. 특히 브랜치가 많은 경우 이 방법을 사용할 수 있다. git clone -b {branch_name} --single-branch {저장소 URL} ex) git clone -b javajigi --single-branch https://github.com/javajigi/java-racingcar 위와 같이 실행하면 java-racingcar의 javajigi branch만 clone할 수 있다." + } + repositories: { + url: "https://github.com/evaristr/py-faster_rcnn" + owner: "evaristr" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/charlesYangM/py-faster-rcnn-80.28" + owner: "charlesYangM" + framework: FRAMEWORK_OTHERS + description: "faster-rcnn" } methods: { name: "RoIAlign" @@ -5719,8 +5744,8 @@ pr_id_to_video: { video: { video_id: "RtSZALC9DlU" video_title: "PR-057: Mask R-CNN" - number_of_likes: 135 - number_of_views: 11179 + number_of_likes: 138 + number_of_views: 11398 published_date: { seconds: 1515330928 } @@ -5745,7 +5770,7 @@ pr_id_to_video: { video: { video_id: "7fIAdhl0KYc" video_title: "PR-058: The Consciousness Prior" - number_of_views: 1160 + number_of_views: 1175 published_date: { seconds: 1515333966 } @@ -5769,31 +5794,6 @@ pr_id_to_video: { authors: "Tao Lei" authors: "Regina Barzilay" authors: "Tommi Jaakkola" - repositories: { - url: "https://github.com/jpark621/language-style-transfer" - owner: "jpark621" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 - description: "Reimplementation of NLP Style Transfer from Non-parallel Text with Adversarial Alignment (https://arxiv.org/abs/1705.09655)" - } - repositories: { - url: "https://github.com/jishavm/TextStyleTransfer" - owner: "jishavm" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/kyuer/language-style-transfer" - owner: "kyuer" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "\"Style Transfer from Non-Parallel Text by Cross-Alignment\". Tianxiao Shen, Tao Lei, Regina Barzilay, and Tommi Jaakkola. NIPS 2017." - } - repositories: { - url: "https://github.com/kaletap/language-style-transfer-pytorch" - owner: "kaletap" - framework: FRAMEWORK_TENSORFLOW - description: "Experiments to rewrite style transfer code from tensorflow to pytorch (not finished yet)" - } repositories: { url: "https://github.com/qfzhu/st" owner: "qfzhu" @@ -5804,7 +5804,7 @@ pr_id_to_video: { url: "https://github.com/shentianxiao/language-style-transfer" owner: "shentianxiao" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 497 + number_of_stars: 500 } repositories: { url: "https://github.com/mariob6/style_text" @@ -5831,12 +5831,36 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW description: "Final Project for Deep Learning on Text Style Transfer" } + repositories: { + url: "https://github.com/bywords/lang-style-transfer-legacy" + owner: "bywords" + framework: FRAMEWORK_TENSORFLOW + description: "\"Style Transfer from Non-Parallel Text by Cross-Alignment\"" + } + repositories: { + url: "https://github.com/kaletap/nlp-style-transfer" + owner: "kaletap" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/kaletap/language-style-transfer-pytorch" + owner: "kaletap" + framework: FRAMEWORK_TENSORFLOW + description: "Experiments to rewrite style transfer code from tensorflow to pytorch (not finished yet)" + } + repositories: { + url: "https://github.com/kyuer/language-style-transfer" + owner: "kyuer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "\"Style Transfer from Non-Parallel Text by Cross-Alignment\". Tianxiao Shen, Tao Lei, Regina Barzilay, and Tommi Jaakkola. NIPS 2017." + } } video: { video_id: "w-P2V2LlrHg" video_title: "PR-059: Style Transfer from Non-Parallel Text by Cross-Alignment" number_of_likes: 13 - number_of_views: 1057 + number_of_views: 1061 published_date: { seconds: 1515977170 } @@ -5860,24 +5884,6 @@ pr_id_to_video: { authors: "Lina Yao" authors: "Aixin Sun" authors: "Yi Tay" - repositories: { - url: "https://github.com/philippe-gagne/treasure-boxd" - owner: "philippe-gagne" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Treasure-boxd is a tool that predicts movie preferences and provides film recommendations based on users' Letterboxd data." - } - repositories: { - url: "https://github.com/YichenLin/MATH-80600A-Project" - owner: "YichenLin" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/abmitra84/recommender_system" - owner: "abmitra84" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } repositories: { url: "https://github.com/anuragreddygv323/Important-stuff" owner: "anuragreddygv323" @@ -5911,12 +5917,30 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW description: "Collection of codes and papers in the topic of recommender system" } + repositories: { + url: "https://github.com/abmitra84/recommender_system" + owner: "abmitra84" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/YichenLin/MATH-80600A-Project" + owner: "YichenLin" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/philippe-gagne/treasure-boxd" + owner: "philippe-gagne" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Treasure-boxd is a tool that predicts movie preferences and provides film recommendations based on users' Letterboxd data." + } } video: { video_id: "V6zixdCIOqw" video_title: "PR-060: Deep Neural Networks for YouTube Recommendations" - number_of_likes: 50 - number_of_views: 4028 + number_of_likes: 51 + number_of_views: 4093 published_date: { seconds: 1516540254 } @@ -5941,16 +5965,6 @@ pr_id_to_video: { authors: "Moritz Hardt" authors: "Benjamin Recht" authors: "Oriol Vinyals" - repositories: { - url: "https://github.com/randyshee/TensorFlow-Projects" - owner: "randyshee" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/iwzy7071/graph_neural_network" - owner: "iwzy7071" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/2xic/notebooks" owner: "2xic" @@ -5976,14 +5990,14 @@ pr_id_to_video: { url: "https://github.com/glouppe/info8010-deep-learning" owner: "glouppe" framework: FRAMEWORK_PYTORCH - number_of_stars: 666 + number_of_stars: 670 description: "Lectures for INFO8010 - Deep Learning, ULiège" } repositories: { url: "https://github.com/jessemzhang/dl_spectral_normalization" owner: "jessemzhang" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 + number_of_stars: 11 } repositories: { url: "https://github.com/KellyHwong/rethinking_generalization" @@ -5991,12 +6005,22 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW description: "UNDERSTANDING DEEP LEARNING REQUIRES RETHINKING GENERALIZATION" } + repositories: { + url: "https://github.com/iwzy7071/graph_neural_network" + owner: "iwzy7071" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/randyshee/TensorFlow-Projects" + owner: "randyshee" + framework: FRAMEWORK_TENSORFLOW + } } video: { video_id: "UxJNG7ENRNg" video_title: "PR-061: Understanding Deep Learning Requires Rethinking Generalization" number_of_likes: 50 - number_of_views: 3260 + number_of_views: 3306 published_date: { seconds: 1516543607 } @@ -6027,7 +6051,7 @@ pr_id_to_video: { video_id: "6hg5d10SZr0" video_title: "PR-062: Deep Learning: A Critical Appraisal (2018)" number_of_likes: 57 - number_of_views: 3628 + number_of_views: 3645 published_date: { seconds: 1517147263 } @@ -6070,7 +6094,7 @@ pr_id_to_video: { video_id: "ZO4bXgdcCQA" video_title: "PR-063 Peephole: Predicting Network Performance Before Training" number_of_likes: 5 - number_of_views: 779 + number_of_views: 783 published_date: { seconds: 1517147277 } @@ -6106,73 +6130,78 @@ pr_id_to_video: { authors: "Vihan Jain" authors: "Xiaobing Liu" authors: "Hemal Shah" - repositories: { - url: "https://github.com/shenweichen/DeepCTR" - owner: "shenweichen" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5130 - description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." - } repositories: { url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/rank/wide_deep" owner: "rank" framework: FRAMEWORK_OTHERS - number_of_stars: 556 + number_of_stars: 594 description: "大规模推荐模型训练工具" } repositories: { - url: "https://github.com/fengtong-xiao/DMBGN" - owner: "fengtong-xiao" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "The implementation of the submitted paper \"Deep Multi-Behaviors Graph Network for Voucher Redemption Rate Prediction\" in SIGKDD 2021 Applied Data Science Track." + url: "https://github.com/deepakshankar94/Exploring-DL-based-Recommendation-Systems" + owner: "deepakshankar94" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { - url: "https://github.com/aivolcano/RecSys_tf2" - owner: "aivolcano" + url: "https://github.com/pollyyu/Final_Project_MachineLearning_in_TensorFlow_Berkeley" + owner: "pollyyu" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + number_of_stars: 2 + description: "This is the final project of Machine Learning Course in TensorFlow for Berkeley extension class" } repositories: { - url: "https://github.com/NVIDIA/HugeCTR" - owner: "NVIDIA" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 444 - description: "HugeCTR is a high efficiency GPU framework designed for Click-Through-Rate (CTR) estimating training" + url: "https://github.com/qmonmous/pySpark-X-RecommenderSys--intro" + owner: "qmonmous" + framework: FRAMEWORK_OTHERS + description: "🇬🇧 Introduction to pySpark by building a very simple recommender system." } repositories: { - url: "https://github.com/jsleroux/Recommender-Systems" - owner: "jsleroux" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/qmonmous/BigData-X-Python" + owner: "qmonmous" + framework: FRAMEWORK_OTHERS + description: "🇬🇧 Introduction to pySpark by building a very simple recommender system." } repositories: { - url: "https://github.com/codlife/NLP" - owner: "codlife" + url: "https://github.com/deepakshankar94/Movie-Recommendation-System" + owner: "deepakshankar94" framework: FRAMEWORK_OTHERS + number_of_stars: 6 + description: "Movie recommendation system built with factorization machines and deep learning" } repositories: { - url: "https://github.com/vinaymittal22/Income_Prediction_US" - owner: "vinaymittal22" + url: "https://github.com/floraxhuang/Movie-Recommendation-System" + owner: "floraxhuang" framework: FRAMEWORK_OTHERS - description: "Adult data set solve for predict income of US population" + number_of_stars: 6 + description: "Movie recommendation system built with factorization machines and deep learning" } repositories: { - url: "https://github.com/yil479/yelp_review" - owner: "yil479" - framework: FRAMEWORK_OTHERS + url: "https://github.com/GitHub-HongweiZhang/prediction-flow" + owner: "GitHub-HongweiZhang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 173 + description: "Deep-Learning based CTR models implemented by PyTorch" } repositories: { - url: "https://github.com/sandeepnair2812/Deep-Learning-Based-Search-and-Recommendation-System" - owner: "sandeepnair2812" + url: "https://github.com/alsoj/Recommenders-movielens" + owner: "alsoj" framework: FRAMEWORK_TENSORFLOW + description: "Recommenders-movielens" + } + repositories: { + url: "https://github.com/aj9011/Wide-and-Deep-Demand-Forecasting" + owner: "aj9011" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "SK Telesis Project" } } video: { video_id: "hKoJPqWLrI4" video_title: "PR-064: Wide&Deep Learning for Recommender Systems" - number_of_likes: 31 - number_of_views: 2149 + number_of_likes: 32 + number_of_views: 2183 published_date: { seconds: 1517749978 } @@ -6198,36 +6227,12 @@ pr_id_to_video: { authors: "Andrew Tao" authors: "Jan Kautz" authors: "Bryan Catanzaro" - repositories: { - url: "https://github.com/JeongHyunJin/Pix2PixHD" - owner: "JeongHyunJin" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - } - repositories: { - url: "https://github.com/ubc-vision/DwNet" - owner: "ubc-vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - repositories: { - url: "https://github.com/haru-256/pix2pixHD.pytorch" - owner: "haru-256" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/SeniorDev009/ONNX-project" - owner: "SeniorDev009" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } repositories: { is_official: true url: "https://github.com/NVIDIA/pix2pixHD" owner: "NVIDIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 5347 + number_of_stars: 5395 description: "Synthesizing and manipulating 2048x1024 images with conditional GANs" } repositories: { @@ -6244,7 +6249,7 @@ pr_id_to_video: { url: "https://github.com/mingyuliutw/UNIT" owner: "mingyuliutw" framework: FRAMEWORK_PYTORCH - number_of_stars: 1762 + number_of_stars: 1771 description: "Unsupervised Image-to-Image Translation" } repositories: { @@ -6258,12 +6263,37 @@ pr_id_to_video: { owner: "LiuNull" framework: FRAMEWORK_PYTORCH } + repositories: { + url: "https://github.com/SeniorDev009/ONNX-project" + owner: "SeniorDev009" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + url: "https://github.com/agermanidis/pix2pixHD-runway" + owner: "agermanidis" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + } + repositories: { + url: "https://github.com/haru-256/pix2pixHD.pytorch" + owner: "haru-256" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/moabarar/nemar" + owner: "moabarar" + framework: FRAMEWORK_PYTORCH + number_of_stars: 77 + description: "[CVPR2020] Unsupervised Multi-Modal Image Registration via Geometry Preserving Image-to-Image Translation" + } } video: { video_id: "_5ofbwltEKU" video_title: "PR-065 : High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs" number_of_likes: 16 - number_of_views: 1911 + number_of_views: 1922 published_date: { seconds: 1517753318 } @@ -6306,8 +6336,8 @@ pr_id_to_video: { video: { video_id: "jFpO-E4RPhQ" video_title: "PR-066: Don't decay the learning rate, increase the batch size" - number_of_likes: 19 - number_of_views: 2372 + number_of_likes: 20 + number_of_views: 2391 published_date: { seconds: 1518357854 } @@ -6330,18 +6360,11 @@ pr_id_to_video: { authors: "Volodymyr Kuleshov" authors: "S. Zayd Enam" authors: "Stefano Ermon" - repositories: { - url: "https://github.com/johnathanchiu/audio-upsampling" - owner: "johnathanchiu" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Uses CNN to upsample low-res audio files" - } repositories: { url: "https://github.com/kuleshov/audio-super-res" owner: "kuleshov" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 557 + number_of_stars: 574 description: "Audio super resolution using neural networks" } repositories: { @@ -6357,12 +6380,19 @@ pr_id_to_video: { number_of_stars: 4 description: "Audio Super-Resolution performed on VCTK corpus" } + repositories: { + url: "https://github.com/johnathanchiu/audio-upsampling" + owner: "johnathanchiu" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Uses CNN to upsample low-res audio files" + } } video: { video_id: "iqN08EPMjSs" video_title: "PR-067: Audio Super Resolution using Neural Nets" number_of_likes: 21 - number_of_views: 3391 + number_of_views: 3426 published_date: { seconds: 1518357824 } @@ -6386,15 +6416,42 @@ pr_id_to_video: { authors: "Valentin Flunkert" authors: "Jan Gasthaus" repositories: { - url: "https://github.com/kshmawj111/solar_energy_forecast" - owner: "kshmawj111" + url: "https://github.com/nuankw/Summer-Research-2018-Part-One" + owner: "nuankw" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "PART I DeepAR implementation based on paper: https://arxiv.org/pdf/1704.04110.pdf" + } + repositories: { + url: "https://github.com/Yonder-OSS/D3M-Primitives" + owner: "Yonder-OSS" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/husnejahan/DeepAR-pytorch" + owner: "husnejahan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 50 + } + repositories: { + url: "https://github.com/skp2/Electricity-Load" + owner: "skp2" framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Predict Electricity load from historical time series" + } + repositories: { + url: "https://github.com/zhykoties/DeepAR" + owner: "zhykoties" + framework: FRAMEWORK_PYTORCH + number_of_stars: 169 + description: "Implementation of deep learning models for time series in PyTorch." } repositories: { url: "https://github.com/jdb78/pytorch-forecasting" owner: "jdb78" framework: FRAMEWORK_PYTORCH - number_of_stars: 1199 + number_of_stars: 1264 description: "Time series forecasting with PyTorch" } repositories: { @@ -6405,54 +6462,29 @@ pr_id_to_video: { description: "use deepar to predict water supply network pressure " } repositories: { - url: "https://github.com/ensembles4612/product_demand_forecast_using_DeepAR_Amazon_SageMaker" - owner: "ensembles4612" - framework: FRAMEWORK_OTHERS - description: "I built a forecast tool using DeepAR (autoregressive RNN with LSTM cells) in Sagemaker that can predict the demand of hundreds of products simultaneously." - } - repositories: { - url: "https://github.com/skp2/Electricity-Load" - owner: "skp2" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Predict Electricity load from historical time series" + url: "https://github.com/NewKnowledge/TimeSeries-D3M-Wrappers" + owner: "NewKnowledge" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 } repositories: { - url: "https://github.com/Yonder-OSS/D3M-Primitives" - owner: "Yonder-OSS" + url: "https://github.com/ucl-exoplanets/deepARTransit" + owner: "ucl-exoplanets" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "A library for interpolating and detrending transit light curves with LSTMs." } repositories: { url: "https://github.com/Timbasa/Sample_GluonTS" owner: "Timbasa" framework: FRAMEWORK_OTHERS } - repositories: { - url: "https://github.com/nuankw/Summer-Research-2018-Part-One" - owner: "nuankw" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 - description: "PART I DeepAR implementation based on paper: https://arxiv.org/pdf/1704.04110.pdf" - } - repositories: { - url: "https://github.com/husnejahan/DeepAR-pytorch" - owner: "husnejahan" - framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - } - repositories: { - url: "https://github.com/zhykoties/DeepAR" - owner: "zhykoties" - framework: FRAMEWORK_PYTORCH - number_of_stars: 163 - description: "Implementation of deep learning models for time series in PyTorch." - } } video: { video_id: "okyo61ZZivA" video_title: "PR-068: DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks" - number_of_likes: 23 - number_of_views: 3988 + number_of_likes: 27 + number_of_views: 4053 published_date: { seconds: 1519565309 } @@ -6478,70 +6510,71 @@ pr_id_to_video: { authors: "Quoc V. Le" authors: "Jeff Dean" repositories: { - url: "https://github.com/distrue/enas_tensorflow" - owner: "distrue" + url: "https://github.com/melodyguan/enas" + owner: "melodyguan" framework: FRAMEWORK_TENSORFLOW - description: "Implementation of Multi-Objective reward based on ENAS backbone" + number_of_stars: 1522 + description: "TensorFlow Code for paper \"Efficient Neural Architecture Search via Parameter Sharing\"" } repositories: { - url: "https://github.com/guoyongcs/NATv2" - owner: "guoyongcs" - framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "Implementation for NATv2." + url: "https://github.com/cshannonn/blackscholes_nas" + owner: "cshannonn" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "Can a neural network learn Black Scholes, yes..." } repositories: { - url: "https://github.com/f51980280/ENAS-Implement" - owner: "f51980280" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "DeepLearning Systems and Inference Realization" + url: "https://github.com/ahundt/enas" + owner: "ahundt" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 12 + description: "TensorFlow code for paper \"Training Frankenstein's Creature to Stack: HyperTree Architecture Search\"" } repositories: { - url: "https://github.com/nikitati/Nas.jl" - owner: "nikitati" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Programmable Neural Architecture Search" + url: "https://github.com/ahundt/renas" + owner: "ahundt" + framework: FRAMEWORK_TENSORFLOW + description: "TensorFlow code for paper \"Training Frankenstein's Creature to Stack: HyperTree Architecture Search\"" } repositories: { - url: "https://github.com/invisibleForce/ENAS-Pytorch" - owner: "invisibleForce" + url: "https://github.com/carpedm20/ENAS-pytorch" + owner: "carpedm20" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "a pytorch implementation of ENAS " + number_of_stars: 2459 + description: "PyTorch implementation of \"Efficient Neural Architecture Search via Parameters Sharing\"" } repositories: { - url: "https://github.com/senthilva/Keras_functional_API_CNN" - owner: "senthilva" - framework: FRAMEWORK_OTHERS + url: "https://github.com/aymanshams07/enas_cifar10" + owner: "aymanshams07" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/WillButAgain/ENAS" - owner: "WillButAgain" - framework: FRAMEWORK_PYTORCH - description: "scratch implementation of \"Efficient Neural Architecture Search via Parameter Sharing\" https://arxiv.org/pdf/1802.03268.pdf" + url: "https://github.com/yashkant/ENAS-Quantized-Neural-Networks" + owner: "yashkant" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Efficient Neural Architecture Search coupled with Quantized CNNs to search for resource efficient and accurate architectures." } repositories: { - url: "https://github.com/melodyguan/enas" - owner: "melodyguan" + url: "https://github.com/Ezereal/enas" + owner: "Ezereal" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1519 - description: "TensorFlow Code for paper \"Efficient Neural Architecture Search via Parameter Sharing\"" + number_of_stars: 1 + description: "fork from https://github.com/melodyguan/enas/" } repositories: { - url: "https://github.com/cshannonn/blackscholes_nas" - owner: "cshannonn" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "Can a neural network learn Black Scholes, yes..." + url: "https://github.com/MINGUKKANG/ENAS-Tensorflow" + owner: "MINGUKKANG" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 106 + description: "Efficient Neural Architecture search via parameter sharing(ENAS) micro search Tensorflow code for windows user" } repositories: { - url: "https://github.com/ahundt/enas" - owner: "ahundt" + url: "https://github.com/countif/enas_nni" + owner: "countif" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 12 - description: "TensorFlow code for paper \"Training Frankenstein's Creature to Stack: HyperTree Architecture Search\"" + number_of_stars: 25 + description: "This code is for running enas on nni. " } methods: { name: "Tanh Activation" @@ -6568,7 +6601,7 @@ pr_id_to_video: { video_id: "fbCcJaSQPPA" video_title: "PR-069: Efficient Neural Architecture Search via Parameter Sharing" number_of_likes: 44 - number_of_views: 4343 + number_of_views: 4397 published_date: { seconds: 1520088191 } @@ -6596,7 +6629,7 @@ pr_id_to_video: { video_id: "CtaPFqq8P00" video_title: "PR-070: SafetyNets: Verifiable Execution of Deep Neural Networks on an Untrusted Cloud" number_of_likes: 2 - number_of_views: 450 + number_of_views: 453 published_date: { seconds: 1520171150 } @@ -6620,72 +6653,72 @@ pr_id_to_video: { authors: "Shixiang Gu" authors: "Ben Poole" repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/rebar" - owner: "research" + url: "https://github.com/tensorflow/models" + owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } + repositories: { + url: "https://github.com/GuyLor/direct_vae" + owner: "GuyLor" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12 + description: "Implementation of the paper \"Direct Optimization through argmax for discrete Variational Auto-Encoder\"" + } repositories: { url: "https://github.com/karpathy/deep-vector-quantization" owner: "karpathy" framework: FRAMEWORK_PYTORCH - number_of_stars: 252 + number_of_stars: 268 description: "VQVAEs, GumbelSoftmaxes and friends" } repositories: { - url: "https://github.com/Jmkernes/PAR-Transformer-XL" - owner: "Jmkernes" + url: "https://github.com/crowdflowTUe/stampnet" + owner: "crowdflowTUe" framework: FRAMEWORK_TENSORFLOW number_of_stars: 3 - description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" + description: "code for \"StampNet: unsupervised multi-class object discovery\" by Visser, Corbetta, Menkovski and Toschi (https://arxiv.org/abs/1902.02693)" } repositories: { - url: "https://github.com/EddieCunningham/GraphLSSM" - owner: "EddieCunningham" - framework: FRAMEWORK_OTHERS - number_of_stars: 5 - description: "Graphical Latent State Space Models" + url: "https://github.com/AntixK/PyTorch-VAE" + owner: "AntixK" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2276 + description: "A Collection of Variational Autoencoders (VAE) in PyTorch." } repositories: { - url: "https://github.com/stefanthaler/gumbel-softmax-exploration" - owner: "stefanthaler" - framework: FRAMEWORK_TENSORFLOW - description: "Exploration of the Gumbel Softmax Paper https://arxiv.org/pdf/1611.01144.pdf" + url: "https://github.com/ericjang/gumbel-softmax" + owner: "ericjang" + framework: FRAMEWORK_OTHERS + number_of_stars: 363 + description: "categorical variational autoencoder using the Gumbel-Softmax estimator" } repositories: { - url: "https://github.com/kampta/pytorch-distributions" - owner: "kampta" + url: "https://github.com/shaabhishek/gumbel-softmax-pytorch" + owner: "shaabhishek" framework: FRAMEWORK_PYTORCH number_of_stars: 8 - description: "Basic VAE flow using pytorch distributions" + description: "categorical variational autoencoder using the Gumbel-Softmax estimator" } repositories: { - url: "https://github.com/OlivierAlgoet/Tensorflow2-GMM" - owner: "OlivierAlgoet" + url: "https://github.com/stefanthaler/dl-papers-imlemented" + owner: "stefanthaler" framework: FRAMEWORK_TENSORFLOW - description: "Gaussian mixture model" + description: "Implementations of deep learning papers. " } repositories: { - url: "https://github.com/tensorflow/models" - owner: "tensorflow" + url: "https://github.com/Jmkernes/PAR-Transformer-XL" + owner: "Jmkernes" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70579 - description: "Models and examples built with TensorFlow" - } - repositories: { - url: "https://github.com/GuyLor/direct_vae" - owner: "GuyLor" - framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Implementation of the paper \"Direct Optimization through argmax for discrete Variational Auto-Encoder\"" + number_of_stars: 3 + description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" } repositories: { - url: "https://github.com/crowdflowTUe/stampnet" - owner: "crowdflowTUe" + url: "https://github.com/OlivierAlgoet/Tensorflow2-GMM" + owner: "OlivierAlgoet" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "code for \"StampNet: unsupervised multi-class object discovery\" by Visser, Corbetta, Menkovski and Toschi (https://arxiv.org/abs/1902.02693)" + description: "Gaussian mixture model" } methods: { name: "Gumbel Softmax" @@ -6696,8 +6729,8 @@ pr_id_to_video: { video: { video_id: "ty3SciyoIyk" video_title: "PR-071: Categorical Reparameterization with Gumbel Softmax" - number_of_likes: 42 - number_of_views: 4334 + number_of_likes: 43 + number_of_views: 4452 published_date: { seconds: 1520172922 } @@ -6724,7 +6757,7 @@ pr_id_to_video: { url: "https://github.com/songhan/Deep-Compression-AlexNet" owner: "songhan" framework: FRAMEWORK_OTHERS - number_of_stars: 573 + number_of_stars: 580 description: "Deep Compression on AlexNet" } repositories: { @@ -6839,8 +6872,8 @@ pr_id_to_video: { video: { video_id: "9mFZmpIbMDs" video_title: "PR-072: Deep Compression" - number_of_likes: 27 - number_of_views: 2200 + number_of_likes: 28 + number_of_views: 2230 published_date: { seconds: 1520777304 } @@ -6867,8 +6900,8 @@ pr_id_to_video: { video: { video_id: "U8IpNf1b57w" video_title: "PR-073: Generative Semantic Manipulation with Contrasting GAN" - number_of_likes: 4 - number_of_views: 757 + number_of_likes: 3 + number_of_views: 762 published_date: { seconds: 1520778031 } @@ -6897,7 +6930,7 @@ pr_id_to_video: { url: "https://github.com/ung200/thats-what-obama-said" owner: "ung200" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 19 + number_of_stars: 20 } methods: { name: "ReLU" @@ -6953,7 +6986,7 @@ pr_id_to_video: { video: { video_id: "A1o6SUsWd98" video_title: "PR-074: ObamaNet: Photo-realistic lip-sync from text" - number_of_views: 2024 + number_of_views: 2050 published_date: { seconds: 1521381942 } @@ -6977,48 +7010,6 @@ pr_id_to_video: { authors: "Geoff Pleiss" authors: "Yu Sun" authors: "Kilian Q. Weinberger" - repositories: { - url: "https://github.com/hollance/reliability-diagrams" - owner: "hollance" - framework: FRAMEWORK_PYTORCH - number_of_stars: 30 - description: "Reliability diagrams visualize whether a classifier model needs calibration" - } - repositories: { - url: "https://github.com/sleep3r/garrus" - owner: "sleep3r" - framework: FRAMEWORK_OTHERS - number_of_stars: 13 - description: "Python framework for high quality confidence estimation of deep neural networks, providing methods such as confidence calibration and ordinal ranking" - } - repositories: { - url: "https://github.com/bayesgroup/pytorch-ensembles" - owner: "bayesgroup" - framework: FRAMEWORK_PYTORCH - number_of_stars: 142 - description: "Pitfalls of In-Domain Uncertainty Estimation and Ensembling in Deep Learning, ICLR 2020" - } - repositories: { - url: "https://github.com/artnitolog/diary" - owner: "artnitolog" - framework: FRAMEWORK_OTHERS - description: "Accompanying repository for the 3rd year corsework. CMC MSU, MMF, 2020-2021." - } - repositories: { - url: "https://github.com/johntd54/stanford_car" - owner: "johntd54" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Classification model for fine-grained visual classification on the Stanford Car dataset." - } - repositories: { - is_official: true - url: "https://github.com/gpleiss/temperature_scaling" - owner: "gpleiss" - framework: FRAMEWORK_PYTORCH - number_of_stars: 565 - description: "A simple way to calibrate your neural network." - } repositories: { url: "https://github.com/AnanyaKumar/verified_calibration" owner: "AnanyaKumar" @@ -7035,7 +7026,7 @@ pr_id_to_video: { url: "https://github.com/Jonathan-Pearce/calibration_library" owner: "Jonathan-Pearce" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 + number_of_stars: 9 description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" } repositories: { @@ -7045,12 +7036,51 @@ pr_id_to_video: { number_of_stars: 32 description: "Code for the 2018 EMNLP Interpretability Workshop Paper \"Interpreting Neural Networks with Nearest Neighbors\"" } + repositories: { + url: "https://github.com/Jonathan-Pearce/cnn_calibration" + owner: "Jonathan-Pearce" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" + } + repositories: { + url: "https://github.com/cpark321/uncertainty-deep-learning" + owner: "cpark321" + framework: FRAMEWORK_PYTORCH + number_of_stars: 98 + } + repositories: { + url: "https://github.com/cpark321/bayesian-neural-networks" + owner: "cpark321" + framework: FRAMEWORK_PYTORCH + number_of_stars: 98 + } + repositories: { + url: "https://github.com/aigen/df-posthoc-calibration" + owner: "aigen" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 + description: "Model-agnostic posthoc calibration without distributional assumptions" + } + repositories: { + url: "https://github.com/sirius8050/Expected-Calibration-Error" + owner: "sirius8050" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 + } + repositories: { + url: "https://github.com/ondrejba/tf_calibrate" + owner: "ondrejba" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Calibration of neural networks in Tensorflow." + } } video: { video_id: "odNHEkfJAc4" video_title: "PR-075: On Calibration of Modern Neural Networks (2017)" - number_of_likes: 28 - number_of_views: 2921 + number_of_likes: 29 + number_of_views: 2976 published_date: { seconds: 1521987100 } @@ -7072,28 +7102,6 @@ pr_id_to_video: { } authors: "Quoc V. Le" authors: "Tomas Mikolov" - repositories: { - url: "https://github.com/Antonildo43/Classifica-o-de-textos-com-doc2Vec" - owner: "Antonildo43" - framework: FRAMEWORK_OTHERS - description: "Classificação de Documentos com doc2Vec" - } - repositories: { - url: "https://github.com/jimmy6727/Informd" - owner: "jimmy6727" - framework: FRAMEWORK_TENSORFLOW - description: "Project repo for Mozilla Spring Incubator Lab 2020 Project " - } - repositories: { - url: "https://github.com/wiflore/IBM_Articles_Recomender" - owner: "wiflore" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/hithisisdhara/doc2vec" - owner: "hithisisdhara" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/kr900910/supreme_court_opinion" owner: "kr900910" @@ -7115,7 +7123,7 @@ pr_id_to_video: { url: "https://github.com/ibrahimsharaf/doc2vec" owner: "ibrahimsharaf" framework: FRAMEWORK_OTHERS - number_of_stars: 92 + number_of_stars: 93 description: ":notebook: Long(er) text representation and classification using Doc2Vec embeddings" } repositories: { @@ -7131,12 +7139,38 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 } + repositories: { + url: "https://github.com/DCYN/Ramdomized-Clinical-Trail-Classification" + owner: "DCYN" + framework: FRAMEWORK_TENSORFLOW + description: "Applying deeplearning + svm classifier to get randomized clinical trails" + } + repositories: { + url: "https://github.com/kramamur/sentiment-analysis" + owner: "kramamur" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Positive or negative sentiment analysis from text" + } + repositories: { + url: "https://github.com/Antonildo43/Classifica-o-de-textos-com-doc2Vec" + owner: "Antonildo43" + framework: FRAMEWORK_OTHERS + description: "Classificação de Documentos com doc2Vec" + } + repositories: { + url: "https://github.com/fabiocorreacordeiro/Elsevier_abstracts-Classification" + owner: "fabiocorreacordeiro" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "O objetivo desse trabalho foi treinar um algoritmo de classificação que pudesse identificar se um texto científico foi elaborado por uma empresa do setor de Óleo & Gás. " + } } video: { video_id: "NxKpgY6sWOQ" video_title: "PR-076: Distributed Representations of Sentences and Documents" number_of_likes: 18 - number_of_views: 1758 + number_of_views: 1773 published_date: { seconds: 1522587607 } @@ -7161,66 +7195,74 @@ pr_id_to_video: { authors: "Jun Wang" authors: "Yong Yu" repositories: { - url: "https://github.com/project-basileus/multitype-sequence-generation-by-tlstm-gan" - owner: "project-basileus" - framework: FRAMEWORK_TENSORFLOW - description: "Training GANs to generate multi-type sequential data (both numeric and categorical), using Time-LSTM and GAN." + url: "https://github.com/suhoy901/SeqGAN" + owner: "suhoy901" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "pytorch SeqGAN" } repositories: { - url: "https://github.com/lina2360/HiSeqGan" - owner: "lina2360" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/bgenchel/MusicalSeqGAN" + owner: "bgenchel" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Adapt and evaluate SeqGAN for music. Developed in PyTorch, using https://github.com/ZiJianZhao/SeqGAN-PyTorch as a base" } repositories: { - url: "https://github.com/willspag/SeqGan" - owner: "willspag" + url: "https://github.com/AWLyrics/SeqGAN_Poem" + owner: "AWLyrics" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Attempt at Tensorflow 2.3 version of Sequence Gan" + number_of_stars: 7 + description: "SeqGAN for poem generation, can be modified to fit the lyrics genertion problem" } repositories: { - url: "https://github.com/desire2020/RankGAN" - owner: "desire2020" + url: "https://github.com/L0SG/seqgan-music" + owner: "L0SG" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 24 - description: "Implementation of Adversarial Ranking for Language Generation [ArxiV 1705.11001]" + number_of_stars: 20 + description: "Implementation of a paper \"Polyphonic Music Generation with Sequence Generative Adversarial Networks\" in TensorFlow" } repositories: { - url: "https://github.com/medtray/SeqGAN-vs-MLE-vs-PG-BLEU-vs-ScheduleSampling" - owner: "medtray" + url: "https://github.com/TobeyYang/S2S_Temp" + owner: "TobeyYang" framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "Code for EMNLP2019 paper \"Low-Resource Response Generation with Template Prior\"" } repositories: { - url: "https://github.com/chaneeh/SeqGAN_experiment" - owner: "chaneeh" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/LiangqunLu/DLForChatbot" - owner: "LiangqunLu" + url: "https://github.com/chung771026/Implement-seqGAN-with-Keras" + owner: "chung771026" framework: FRAMEWORK_OTHERS - description: "Deep learning for chatbot" + number_of_stars: 5 + description: "Implement seqGAN (https://arxiv.org/abs/1609.05473) with Keras. The task is a Chinese chat bot." } repositories: { - url: "https://github.com/yuanfeisiyuetian/seqgan-modbusTCP" - owner: "yuanfeisiyuetian" - framework: FRAMEWORK_TENSORFLOW - description: "使用seqgan进行ModbusTCP协议的模糊测试" + url: "https://github.com/bgenchel/Reinforcement-Learning-for-Music-Generation" + owner: "bgenchel" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "My final project for the M.S. Music Technology program at Georgia Tech (GTCMT). Deep Reinforcement Learning for Symbolic Music Generation" } repositories: { - is_official: true - url: "https://github.com/LantaoYu/SeqGAN" - owner: "LantaoYu" + url: "https://github.com/GuyTevet/SeqGAN-eval" + owner: "GuyTevet" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1919 + number_of_stars: 5 description: "Implementation of Sequence Generative Adversarial Nets with Policy Gradient" } repositories: { - url: "https://github.com/suhoy901/SeqGAN" - owner: "suhoy901" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/rupes438/CodeGen" + owner: "rupes438" + framework: FRAMEWORK_OTHERS number_of_stars: 2 - description: "pytorch SeqGAN" + description: "Generate Codes using Sequence GAN" + } + repositories: { + url: "https://github.com/TalkToTheGAN/REGAN" + owner: "TalkToTheGAN" + framework: FRAMEWORK_PYTORCH + number_of_stars: 34 + description: "ReGAN: Sequence GAN using RE[INFORCE|LAX|BAR] based PG estimators " } methods: { name: "GAN" @@ -7237,7 +7279,7 @@ pr_id_to_video: { video_id: "BXODIP3QjJI" video_title: "PR-077: SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient" number_of_likes: 16 - number_of_views: 2096 + number_of_views: 2112 published_date: { seconds: 1523239176 } @@ -7260,12 +7302,6 @@ pr_id_to_video: { authors: "Tianqi Chen" authors: "Ian Goodfellow" authors: "Jonathon Shlens" - repositories: { - url: "https://github.com/hxtruong/net2net" - owner: "hxtruong" - framework: FRAMEWORK_TENSORFLOW - description: "Library to increasing size of model. Wider and Deeper any layer of model." - } repositories: { url: "https://github.com/agongt408/vbranch" owner: "agongt408" @@ -7277,15 +7313,21 @@ pr_id_to_video: { url: "https://github.com/soumith/net2net.torch" owner: "soumith" framework: FRAMEWORK_OTHERS - number_of_stars: 153 + number_of_stars: 154 description: "Implementation of http://arxiv.org/abs/1511.05641 that lets one build a larger net starting from a smaller one." } + repositories: { + url: "https://github.com/hxtruong/net2net" + owner: "hxtruong" + framework: FRAMEWORK_TENSORFLOW + description: "Library to increasing size of model. Wider and Deeper any layer of model." + } } video: { video_id: "btsZOMsyH_o" video_title: "PR-078: Net2Net: Accelerating Learning via Knowledge Transfer" number_of_likes: 14 - number_of_views: 1010 + number_of_views: 1021 published_date: { seconds: 1523878774 } @@ -7308,37 +7350,6 @@ pr_id_to_video: { authors: "Chris Donahue" authors: "Julian McAuley" authors: "Miller Puckette" - repositories: { - url: "https://github.com/adrienchaton/BERGAN" - owner: "adrienchaton" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/zassou65535/WaveGAN" - owner: "zassou65535" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "WaveGANによる音声生成器" - } - repositories: { - url: "https://github.com/mahotani/ADVERSARIAL-AUDIO-SYNTHESIS" - owner: "mahotani" - framework: FRAMEWORK_OTHERS - description: "ICLR2019で採択されたADVERSARIAL AUDIO SYNTHESISを読んだメモ的なもの" - } - repositories: { - url: "https://github.com/MaxHolmberg96/WaveGAN" - owner: "MaxHolmberg96" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Implementation of the paper https://arxiv.org/pdf/1802.04208.pdf" - } - repositories: { - url: "https://github.com/Yotsuyubi/wave-nr-gan" - owner: "Yotsuyubi" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/fromme0528/pytorch-WaveGAN" owner: "fromme0528" @@ -7370,6 +7381,40 @@ pr_id_to_video: { owner: "ShaunBarry" framework: FRAMEWORK_TENSORFLOW } + repositories: { + url: "https://github.com/mostafaelaraby/wavegan-pytorch" + owner: "mostafaelaraby" + framework: FRAMEWORK_PYTORCH + number_of_stars: 45 + description: "Pytorch Implementation of wavegan model to generate audio " + } + repositories: { + is_official: true + url: "https://github.com/chrisdonahue/wavegan" + owner: "chrisdonahue" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1031 + description: "WaveGAN: Learn to synthesize raw audio with generative adversarial networks" + } + repositories: { + url: "https://github.com/cristiprg/wavegan-fork" + owner: "cristiprg" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/acheketa/cwavegan" + owner: "acheketa" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 119 + description: "Conditional WaveGAN: Generating audio samples conditioned on class labels" + } + repositories: { + url: "https://github.com/csiki/v2a" + owner: "csiki" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Autoencoding visual-to-auditory sensory substitution" + } methods: { name: "Leaky ReLU" full_name: "Leaky ReLU" @@ -7425,7 +7470,7 @@ pr_id_to_video: { video_id: "UXVKSSXdwb8" video_title: "PR-079: Synthesizing Audio with Generative Adversarial Networks" number_of_likes: 21 - number_of_views: 1311 + number_of_views: 1333 published_date: { seconds: 1523206394 } @@ -7448,13 +7493,6 @@ pr_id_to_video: { authors: "Jasper Snoek" authors: "Hugo Larochelle" authors: "Ryan P. Adams" - repositories: { - url: "https://github.com/c-bata/goptuna" - owner: "c-bata" - framework: FRAMEWORK_OTHERS - number_of_stars: 184 - description: "A hyperparameter optimization framework, inspired by Optuna." - } repositories: { url: "https://github.com/JasperSnoek/spearmint" owner: "JasperSnoek" @@ -7466,7 +7504,7 @@ pr_id_to_video: { url: "https://github.com/HIPS/Spearmint" owner: "HIPS" framework: FRAMEWORK_OTHERS - number_of_stars: 1435 + number_of_stars: 1444 description: "Spearmint Bayesian optimization codebase" } repositories: { @@ -7476,6 +7514,13 @@ pr_id_to_video: { number_of_stars: 1 description: "This is an introduction for Bayesian optimization" } + repositories: { + url: "https://github.com/c-bata/goptuna" + owner: "c-bata" + framework: FRAMEWORK_OTHERS + number_of_stars: 185 + description: "A hyperparameter optimization framework, inspired by Optuna." + } methods: { name: "Gaussian Process" full_name: "Gaussian Process" @@ -7486,7 +7531,7 @@ pr_id_to_video: { video_id: "MnHCe8tGjQ8" video_title: "PR-080: Practical Bayesian Optimization of Machine Learning Algorithms" number_of_likes: 27 - number_of_views: 2391 + number_of_views: 2435 published_date: { seconds: 1523799259 } @@ -7534,7 +7579,7 @@ pr_id_to_video: { video_id: "OgNSFKeHy8k" video_title: "PR-082: Introduction to Speech Separation" number_of_likes: 15 - number_of_views: 1216 + number_of_views: 1222 published_date: { seconds: 1524410583 } @@ -7558,43 +7603,6 @@ pr_id_to_video: { authors: "Ross Girshick" authors: "Abhinav Gupta" authors: "Kaiming He" - repositories: { - url: "https://github.com/open-mmlab/mmaction2" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1016 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" - } - repositories: { - url: "https://github.com/jordiae/DeepLearning-MAI" - owner: "jordiae" - framework: FRAMEWORK_PYTORCH - description: "Code for the Deep Learning course (Master in Artificial Intelligence at UPC)" - } - repositories: { - url: "https://github.com/LRacoci/permutation-graphml" - owner: "LRacoci" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/rijuldhir/TSM" - owner: "rijuldhir" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/JiaPeng1234/MRI-Segmentation-Transformer" - owner: "JiaPeng1234" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - } - repositories: { - is_official: true - url: "https://github.com/facebookresearch/video-nonlocal-net" - owner: "facebookresearch" - framework: FRAMEWORK_OTHERS - number_of_stars: 1795 - description: "Non-local Neural Networks for Video Classification" - } repositories: { url: "https://github.com/jiajunhua/facebookresearch-Detectron" owner: "jiajunhua" @@ -7607,11 +7615,18 @@ pr_id_to_video: { number_of_stars: 8 description: "TensorFlow implementation of Non-local Neural Network " } + repositories: { + url: "https://github.com/open-mmlab/mmaction2" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1115 + description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + } repositories: { url: "https://github.com/facebookresearch/detectron" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 24504 + number_of_stars: 24562 description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." } repositories: { @@ -7621,6 +7636,39 @@ pr_id_to_video: { number_of_stars: 13 description: "Inception-I3D, Non Local finetune, hmdb51_flow" } + repositories: { + url: "https://github.com/changliu816/CV-paper-review" + owner: "changliu816" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/vanoracai/Exploiting-Spatial-temporal-Relationships-for-3D-Pose-Estimation-via-Graph-Convolutional-Networks" + owner: "vanoracai" + framework: FRAMEWORK_PYTORCH + number_of_stars: 67 + description: "code for ICCV 2019 Paper Exploiting Spatial-temporal Relationships for 3D Pose Estimation via Graph Convolutional Networks " + } + repositories: { + url: "https://github.com/MIT-HAN-LAB/temporal-shift-module" + owner: "MIT-HAN-LAB" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1447 + description: "[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding" + } + repositories: { + url: "https://github.com/WavesUR/embedded_TSM" + owner: "WavesUR" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "cs231n project" + } + repositories: { + url: "https://github.com/cmsflash/efficient-attention" + owner: "cmsflash" + framework: FRAMEWORK_PYTORCH + number_of_stars: 105 + description: "An implementation of the efficient attention module." + } methods: { name: "Convolution" full_name: "Convolution" @@ -7675,8 +7723,8 @@ pr_id_to_video: { video: { video_id: "ZM153wo3baA" video_title: "PR-083: Non-local Neural Networks" - number_of_likes: 46 - number_of_views: 5018 + number_of_likes: 47 + number_of_views: 5201 published_date: { seconds: 1525008094 } @@ -7708,7 +7756,7 @@ pr_id_to_video: { url: "https://github.com/CSAILVision/semantic-segmentation-pytorch" owner: "CSAILVision" framework: FRAMEWORK_PYTORCH - number_of_stars: 3961 + number_of_stars: 4011 description: "Pytorch implementation for Semantic Segmentation/Scene Parsing on MIT ADE20K dataset" } repositories: { @@ -7726,21 +7774,21 @@ pr_id_to_video: { url: "https://github.com/chenyilun95/tf-cpn" owner: "chenyilun95" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 759 + number_of_stars: 760 description: "Cascaded Pyramid Network for Multi-Person Pose Estimation (CVPR 2018)" } repositories: { url: "https://github.com/vacancy/Synchronized-BatchNorm-PyTorch" owner: "vacancy" framework: FRAMEWORK_PYTORCH - number_of_stars: 1307 + number_of_stars: 1313 description: "Synchronized Batch Normalization implementation in PyTorch." } repositories: { url: "https://github.com/chrisway613/Synchronized-BatchNormalization" owner: "chrisway613" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 11 description: "Multi-Gpus Synchronized Batch Normalization implementation in PyTorch" } methods: { @@ -7793,7 +7841,7 @@ pr_id_to_video: { video_id: "nkYFEoKQBH0" video_title: "PR-084 MegDet: A Large Mini-Batch Object Detector (CVPR2018)" number_of_likes: 4 - number_of_views: 1226 + number_of_views: 1228 published_date: { seconds: 1525663256 } @@ -7893,7 +7941,7 @@ pr_id_to_video: { video_id: "7WhWkhFAIO4" video_title: "PR-085: In-Datacenter Performance Analysis of a Tensor Processing Unit" number_of_likes: 22 - number_of_views: 1610 + number_of_views: 1647 published_date: { seconds: 1526140508 } @@ -7915,26 +7963,26 @@ pr_id_to_video: { } authors: "Guy Hacohen" authors: "Daphna Weinshall" - repositories: { - url: "https://github.com/josephch405/curriculum-nmt" - owner: "josephch405" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - } repositories: { is_official: true url: "https://github.com/GuyHacohen/curriculum_learning" owner: "GuyHacohen" framework: FRAMEWORK_OTHERS - number_of_stars: 50 + number_of_stars: 51 description: "Code implementing the experiments described in the paper \"On The Power of Curriculum Learning in Training Deep Networks\" by Hacohen & Weinshall (ICML 2019)" } + repositories: { + url: "https://github.com/josephch405/curriculum-nmt" + owner: "josephch405" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + } } video: { video_id: "fQtuWEuwXrA" video_title: "PR-086: Curriculum Learning" number_of_likes: 7 - number_of_views: 1391 + number_of_views: 1405 published_date: { seconds: 1526221428 } @@ -7959,121 +8007,125 @@ pr_id_to_video: { authors: "Masanori Koyama" authors: "Yuichi Yoshida" repositories: { - url: "https://github.com/adbobes/VideoSuperResolution" - owner: "adbobes" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + is_official: true + url: "https://github.com/pfnet-research/sngan_projection" + owner: "pfnet-research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 968 + description: "GANs with spectral normalization and projection discriminator" } repositories: { - url: "https://github.com/karoly-hars/GAN_image_colorizing" - owner: "karoly-hars" + url: "https://github.com/kklemon/bgan-pytorch" + owner: "kklemon" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Image colorization with generative adversarial networks on the CIFAR10 dataset." + description: "PyTorch implementation of Boundary Seeking GAN for discrete data" } repositories: { - url: "https://github.com/ncuzzy/mygan" - owner: "ncuzzy" + url: "https://github.com/guy-oren/DIRT-OST" + owner: "guy-oren" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/IShengFang/SpectralNormalizationKeras" - owner: "IShengFang" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 104 - description: "Spectral Normalization for Keras Dense and Convolution Layers" + url: "https://github.com/Bingwen-Hu/DRIT" + owner: "Bingwen-Hu" + framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/zhusiling/SAGAN" - owner: "zhusiling" + url: "https://github.com/hinofafa/Self-Attention-HearthStone-GAN" + owner: "hinofafa" framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "This repository provides a PyTorch implementation of SAGAN cited by heykeetae/Self-Attention-GAN. This repository provide an efficient method to generate large resolution images and attention weights visualisation using tensorboard platform. Tensorboard is a robust platform to monitor generated images and learning weights in computer vision learning experiment." } repositories: { - url: "https://github.com/qiaolinhan/ws-preprocess" - owner: "qiaolinhan" - framework: FRAMEWORK_OTHERS - description: "This is image restoration for UAV based wildfire segmentation because it will always meet some disturbance, noise or other serious situation " + url: "https://github.com/taki0112/Spectral_Normalization-Tensorflow" + owner: "taki0112" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 131 + description: " Simple Tensorflow Implementation of \"Spectral Normalization for Generative Adversarial Networks\" (ICLR 2018)" } repositories: { - is_official: true - url: "https://github.com/pfnet-research/sngan_projection" - owner: "pfnet-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 959 - description: "GANs with spectral normalization and projection discriminator" + url: "https://github.com/meg965/pytorch-rl" + owner: "meg965" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } repositories: { - url: "https://github.com/kklemon/bgan-pytorch" - owner: "kklemon" + url: "https://github.com/hinofafa/Self-Attention-GAN" + owner: "hinofafa" framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of Boundary Seeking GAN for discrete data" + number_of_stars: 4 + description: "This repository provides a PyTorch implementation of SAGAN cited by heykeetae/Self-Attention-GAN. This repository provide an efficient method to generate large resolution images and attention weights visualisation using tensorboard platform. Tensorboard is a robust platform to monitor generated images and learning weights in computer vision learning experiment." } repositories: { - url: "https://github.com/guy-oren/DIRT-OST" - owner: "guy-oren" + url: "https://github.com/ankitAMD/Self-Attention-GAN-master_pytorch" + owner: "ankitAMD" framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: " Pytorch implementation of Self-Attention Generative Adversarial Networks (SAGAN) of non-cuda user s and its also used by cuda user." } repositories: { - url: "https://github.com/Bingwen-Hu/DRIT" - owner: "Bingwen-Hu" + url: "https://github.com/apnkv/nla_spectral_norm" + owner: "apnkv" framework: FRAMEWORK_PYTORCH + description: "Spectral normalization" } methods: { - name: "GAN Hinge Loss" - full_name: "GAN Hinge Loss" - description: "The **GAN Hinge Loss** is a hinge loss based loss function for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks):\r\n\r\n$$ L\\_{D} = -\\mathbb{E}\\_{\\left(x, y\\right)\\sim{p}\\_{data}}\\left[\\min\\left(0, -1 + D\\left(x, y\\right)\\right)\\right] -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}\\left[\\min\\left(0, -1 - D\\left(G\\left(z\\right), y\\right)\\right)\\right] $$\r\n\r\n$$ L\\_{G} = -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}D\\left(G\\left(z\\right), y\\right) $$" + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" } methods: { - name: "Spectral Normalization" - full_name: "Spectral Normalization" - description: "**Spectral Normalization** is a normalization technique used for generative adversarial networks, used to stabilize training of the discriminator. Spectral normalization has the convenient property that the Lipschitz constant is the only hyper-parameter to be tuned.\r\n\r\nIt controls the Lipschitz constant of the discriminator $f$ by constraining the spectral norm of each layer $g : \\textbf{h}\\_{in} \\rightarrow \\textbf{h}_{out}$. The Lipschitz norm $\\Vert{g}\\Vert\\_{\\text{Lip}}$ is equal to $\\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right)$, where $\\sigma\\left(a\\right)$ is the spectral norm of the matrix $A$ ($L\\_{2}$ matrix norm of $A$):\r\n\r\n$$ \\sigma\\left(a\\right) = \\max\\_{\\textbf{h}:\\textbf{h}\\neq{0}}\\frac{\\Vert{A\\textbf{h}}\\Vert\\_{2}}{\\Vert\\textbf{h}\\Vert\\_{2}} = \\max\\_{\\Vert\\textbf{h}\\Vert\\_{2}\\leq{1}}{\\Vert{A\\textbf{h}}\\Vert\\_{2}} $$\r\n\r\nwhich is equivalent to the largest singular value of $A$. Therefore for a linear layer $g\\left(\\textbf{h}\\right) = W\\textbf{h}$ the norm is given by $\\Vert{g}\\Vert\\_{\\text{Lip}} = \\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right) = \\sup\\_{\\textbf{h}}\\sigma\\left(W\\right) = \\sigma\\left(W\\right) $. Spectral normalization normalizes the spectral norm of the weight matrix $W$ so it satisfies the Lipschitz constraint $\\sigma\\left(W\\right) = 1$:\r\n\r\n$$ \\bar{W}\\_{\\text{SN}}\\left(W\\right) = W / \\sigma\\left(W\\right) $$" + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." } methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." } methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + name: "ResNet" + full_name: "Residual Network" + description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." } methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + name: "GAN Hinge Loss" + full_name: "GAN Hinge Loss" + description: "The **GAN Hinge Loss** is a hinge loss based loss function for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks):\r\n\r\n$$ L\\_{D} = -\\mathbb{E}\\_{\\left(x, y\\right)\\sim{p}\\_{data}}\\left[\\min\\left(0, -1 + D\\left(x, y\\right)\\right)\\right] -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}\\left[\\min\\left(0, -1 - D\\left(G\\left(z\\right), y\\right)\\right)\\right] $$\r\n\r\n$$ L\\_{G} = -\\mathbb{E}\\_{z\\sim{p\\_{z}}, y\\sim{p\\_{data}}}D\\left(G\\left(z\\right), y\\right) $$" } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." } methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { - name: "ResNet" - full_name: "Residual Network" - description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + name: "Spectral Normalization" + full_name: "Spectral Normalization" + description: "**Spectral Normalization** is a normalization technique used for generative adversarial networks, used to stabilize training of the discriminator. Spectral normalization has the convenient property that the Lipschitz constant is the only hyper-parameter to be tuned.\r\n\r\nIt controls the Lipschitz constant of the discriminator $f$ by constraining the spectral norm of each layer $g : \\textbf{h}\\_{in} \\rightarrow \\textbf{h}_{out}$. The Lipschitz norm $\\Vert{g}\\Vert\\_{\\text{Lip}}$ is equal to $\\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right)$, where $\\sigma\\left(a\\right)$ is the spectral norm of the matrix $A$ ($L\\_{2}$ matrix norm of $A$):\r\n\r\n$$ \\sigma\\left(a\\right) = \\max\\_{\\textbf{h}:\\textbf{h}\\neq{0}}\\frac{\\Vert{A\\textbf{h}}\\Vert\\_{2}}{\\Vert\\textbf{h}\\Vert\\_{2}} = \\max\\_{\\Vert\\textbf{h}\\Vert\\_{2}\\leq{1}}{\\Vert{A\\textbf{h}}\\Vert\\_{2}} $$\r\n\r\nwhich is equivalent to the largest singular value of $A$. Therefore for a linear layer $g\\left(\\textbf{h}\\right) = W\\textbf{h}$ the norm is given by $\\Vert{g}\\Vert\\_{\\text{Lip}} = \\sup\\_{\\textbf{h}}\\sigma\\left(\\nabla{g}\\left(\\textbf{h}\\right)\\right) = \\sup\\_{\\textbf{h}}\\sigma\\left(W\\right) = \\sigma\\left(W\\right) $. Spectral normalization normalizes the spectral norm of the weight matrix $W$ so it satisfies the Lipschitz constraint $\\sigma\\left(W\\right) = 1$:\r\n\r\n$$ \\bar{W}\\_{\\text{SN}}\\left(W\\right) = W / \\sigma\\left(W\\right) $$" } methods: { - name: "Leaky ReLU" - full_name: "Leaky ReLU" - description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" } } video: { video_id: "iXSYqohGQhM" video_title: "PR-087: Spectral Normalization for Generative Adversarial Networks" - number_of_likes: 45 - number_of_views: 4736 + number_of_likes: 48 + number_of_views: 4864 published_date: { seconds: 1526221916 } @@ -8097,13 +8149,6 @@ pr_id_to_video: { authors: "Maximilian Soelch" authors: "Justin Bayer" authors: "Patrick van der Smagt" - repositories: { - url: "https://github.com/baggepinnen/DeepFilters.jl" - owner: "baggepinnen" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Fiiiiiiiiiiiiiilters" - } repositories: { is_official: true url: "https://github.com/baggepinnen/DVBF.jl" @@ -8112,6 +8157,13 @@ pr_id_to_video: { number_of_stars: 8 description: "Deep variational Bayes filter in julia using Flux" } + repositories: { + url: "https://github.com/baggepinnen/DeepFilters.jl" + owner: "baggepinnen" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Fiiiiiiiiiiiiiilters" + } methods: { name: "Stochastic Gradient Variational Bayes" full_name: "Stochastic Gradient Variational Bayes" @@ -8121,7 +8173,7 @@ pr_id_to_video: { video_id: "uM0rQtL6_AA" video_title: "PR-088: Deep Variational Bayes Filters (2017)" number_of_likes: 38 - number_of_views: 2818 + number_of_views: 2839 published_date: { seconds: 1526901682 } @@ -8144,19 +8196,11 @@ pr_id_to_video: { authors: "W. James Murdoch" authors: "Peter J. Liu" authors: "Bin Yu" - repositories: { - is_official: true - url: "https://github.com/jamie-murdoch/ContextualDecomposition" - owner: "jamie-murdoch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 53 - description: "Demo for method introduced in \"Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs\"" - } repositories: { url: "https://github.com/csinva/hierarchical-dnn-interpretations" owner: "csinva" framework: FRAMEWORK_PYTORCH - number_of_stars: 92 + number_of_stars: 93 description: "Using / reproducing ACD from the paper \"Hierarchical interpretations for neural network predictions\" 🧠 (ICLR 2019)" } repositories: { @@ -8166,6 +8210,14 @@ pr_id_to_video: { number_of_stars: 1 description: "Contextual Decomposition Experiments" } + repositories: { + is_official: true + url: "https://github.com/jamie-murdoch/ContextualDecomposition" + owner: "jamie-murdoch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 53 + description: "Demo for method introduced in \"Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs\"" + } methods: { name: "Tanh Activation" full_name: "Tanh Activation" @@ -8185,7 +8237,7 @@ pr_id_to_video: { video: { video_id: "5whGIpoLoq4" video_title: "PR-089: Beyond Word Importance: Contextual Decomposition to Extract Interactions from LSTMs" - number_of_views: 442 + number_of_views: 444 published_date: { seconds: 1528641922 } @@ -8220,7 +8272,7 @@ pr_id_to_video: { video_id: "T7i_YKN2EY8" video_title: "PR-090: Representation Learning by Learning to Count" number_of_likes: 3 - number_of_views: 426 + number_of_views: 431 published_date: { seconds: 1529233262 } @@ -8236,7 +8288,7 @@ pr_id_to_video: { video_id: "v1GbxpKqH8Q" video_title: "PR-091: A Universal Music Translation Network" number_of_likes: 12 - number_of_views: 735 + number_of_views: 743 published_date: { seconds: 1529241765 } @@ -8263,8 +8315,8 @@ pr_id_to_video: { video: { video_id: "pAH3KhVnADE" video_title: "PR-092: Distributed Training of Neural Networks" - number_of_likes: 4 - number_of_views: 754 + number_of_likes: 5 + number_of_views: 758 published_date: { seconds: 1529243628 } @@ -8294,7 +8346,7 @@ pr_id_to_video: { url: "https://github.com/MaxSobolMark/HardRLWithYoutube" owner: "MaxSobolMark" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 31 + number_of_stars: 32 description: "TensorFlow implementation of \"Playing hard exploration games by watching YouTube\"" } } @@ -8316,65 +8368,72 @@ pr_id_to_video: { authors: "Pieter Abbeel" authors: "Sergey Levine" repositories: { - url: "https://github.com/ThomasGoerttler/similarity-analysis-of-maml" - owner: "ThomasGoerttler" + url: "https://github.com/clrrrr/promp_plus" + owner: "clrrrr" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Code for \"Exploring the Similarity of Representations in Model-Agnostic Meta-Learning\" Forked from the code of the original paper \"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks\"" + number_of_stars: 2 } repositories: { - url: "https://github.com/mikehuisman/revisiting-learned-optimizers" - owner: "mikehuisman" - framework: FRAMEWORK_PYTORCH + is_official: true + url: "https://github.com/cbfinn/maml" + owner: "cbfinn" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1978 + description: "Code for \"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks\"" } repositories: { - url: "https://github.com/antaradas94/MAML-waste-classification" - owner: "antaradas94" - framework: FRAMEWORK_OTHERS + url: "https://github.com/mari-linhares/tensorflow-maml" + owner: "mari-linhares" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 65 + description: "TensorFlow 2.0 implementation of MAML." } repositories: { - url: "https://github.com/GeorgeDUT/MetaRLSAS" - owner: "GeorgeDUT" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/CocoJam/MAML" + owner: "CocoJam" + framework: FRAMEWORK_TENSORFLOW + description: "MAML" } repositories: { - url: "https://github.com/SinghJasdeep/Projecting-Conflicting-Gradients" - owner: "SinghJasdeep" + url: "https://github.com/tristandeleu/pytorch-maml-rl" + owner: "tristandeleu" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 621 + description: "Reinforcement Learning with Model-Agnostic Meta-Learning in Pytorch" } repositories: { - url: "https://github.com/Tikquuss/meta_XLM" - owner: "Tikquuss" + url: "https://github.com/learnables/learn2learn" + owner: "learnables" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Cross-lingual Language Model (XLM) pretraining and Model-Agnostic Meta-Learning (MAML) for fast adaptation of deep networks" + number_of_stars: 1406 + description: "A PyTorch Library for Meta-learning Research" } repositories: { - url: "https://github.com/Zhiwei-Z/prompzzw" - owner: "Zhiwei-Z" + url: "https://github.com/ArnoutDevos/maml-cifar-fs" + owner: "ArnoutDevos" framework: FRAMEWORK_TENSORFLOW - description: "Experiment sequential meta training using promp" + number_of_stars: 7 + description: "Extended code for \"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks\" with CIFAR-fs classification" } repositories: { - url: "https://github.com/sidney1505/arc_maml_transformer" - owner: "sidney1505" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + url: "https://github.com/katerakelly/pytorch-maml" + owner: "katerakelly" + framework: FRAMEWORK_PYTORCH + number_of_stars: 461 + description: "PyTorch implementation of MAML: https://arxiv.org/abs/1703.03400" } repositories: { - url: "https://github.com/laiviet/maml" - owner: "laiviet" + url: "https://github.com/dragen1860/MAML-Pytorch" + owner: "dragen1860" framework: FRAMEWORK_PYTORCH - description: "Implementation of Model Agnostic Meta Learning" + number_of_stars: 1373 + description: "Elegant PyTorch implementation of paper Model-Agnostic Meta-Learning (MAML)" } repositories: { - url: "https://github.com/foolyc/Meta-SGD" - owner: "foolyc" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Meta-SGD experiment on Omniglot classification compared with MAML" + url: "https://github.com/shunzh/pytorch-maml-rl" + owner: "shunzh" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } methods: { name: "TRPO" @@ -8420,8 +8479,8 @@ pr_id_to_video: { video: { video_id: "fxJXXKZb-ik" video_title: "PR-094: Model-Agnostic Meta-Learning for fast adaptation of deep networks" - number_of_likes: 60 - number_of_views: 5131 + number_of_likes: 61 + number_of_views: 5209 published_date: { seconds: 1529847830 } @@ -8499,7 +8558,7 @@ pr_id_to_video: { video_id: "dAGI3mlOmfw" video_title: "PR-095: Modularity Matters: Learning Invariant Relational Reasoning Tasks" number_of_likes: 9 - number_of_views: 773 + number_of_views: 780 published_date: { seconds: 1532272031 } @@ -8530,15 +8589,15 @@ pr_id_to_video: { url: "https://github.com/StanfordVL/taskonomy" owner: "StanfordVL" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 728 + number_of_stars: 732 description: "Taskonomy: Disentangling Task Transfer Learning" } } video: { video_id: "WjUGrzBIDv0" video_title: "PR-096: Taskonomy: Disentangling Task Transfer Learning" - number_of_likes: 10 - number_of_views: 1186 + number_of_likes: 11 + number_of_views: 1194 published_date: { seconds: 1530451567 } @@ -8576,7 +8635,7 @@ pr_id_to_video: { video: { video_id: "l-pcG77Hr58" video_title: "PR-097: Learning Representations for Counterfactual Inference" - number_of_views: 823 + number_of_views: 837 published_date: { seconds: 1531061236 } @@ -8602,7 +8661,7 @@ pr_id_to_video: { url: "https://github.com/zhengqili/MegaDepth" owner: "zhengqili" framework: FRAMEWORK_PYTORCH - number_of_stars: 544 + number_of_stars: 557 description: "Code of single-view depth prediction algorithm on Internet Photos described in \"MegaDepth: Learning Single-View Depth Prediction from Internet Photos, Z. Li and N. Snavely, CVPR 2018\"." } } @@ -8610,7 +8669,7 @@ pr_id_to_video: { video_id: "tGbMWAFMMBQ" video_title: "PR-098: MegaDepth: Learning Single-View Depth Prediction from Internet Photos (CVPR2018)" number_of_likes: 6 - number_of_views: 784 + number_of_views: 801 published_date: { seconds: 1531661811 } @@ -8638,7 +8697,7 @@ pr_id_to_video: { video_id: "cpCS7LBRkRU" video_title: "PR-099: MRNet-Product2Vec" number_of_likes: 23 - number_of_views: 1344 + number_of_views: 1355 published_date: { seconds: 1531661636 } @@ -8662,21 +8721,21 @@ pr_id_to_video: { authors: "Ilia Petrov" authors: "Olga Barinova" authors: "Anton Konushin" - repositories: { - url: "https://github.com/jpconnel/fbrs-segmentation" - owner: "jpconnel" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "f-brs segmentation modification for Hololens" - } repositories: { is_official: true url: "https://github.com/saic-vul/fbrs_interactive_segmentation" owner: "saic-vul" framework: FRAMEWORK_PYTORCH - number_of_stars: 405 + number_of_stars: 416 description: "[CVPR2020] f-BRS: Rethinking Backpropagating Refinement for Interactive Segmentation https://arxiv.org/abs/2001.10331" } + repositories: { + url: "https://github.com/jpconnel/fbrs-segmentation" + owner: "jpconnel" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "f-brs segmentation modification for Hololens" + } methods: { name: "Spatial Broadcast Decoder" full_name: "Spatial Broadcast Decoder" @@ -8687,7 +8746,7 @@ pr_id_to_video: { video_id: "ksTkCecBTCY" video_title: "PR100: SeedNet" number_of_likes: 11 - number_of_views: 1047 + number_of_views: 1056 published_date: { seconds: 1532265921 } @@ -8712,44 +8771,35 @@ pr_id_to_video: { authors: "Ke Sun" authors: "Guoping Qiu" repositories: { - url: "https://github.com/svenrdz/DFC-VAE" - owner: "svenrdz" + url: "https://github.com/matthew-liu/beta-vae" + owner: "matthew-liu" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Deep Feature Consistent Variational AutoEncoder (Pytorch)" + number_of_stars: 29 + description: "A Pytorch Implementation of the Beta-VAE" } repositories: { - url: "https://github.com/nmichlo/disent" - owner: "nmichlo" + url: "https://github.com/ku2482/vae.pytorch" + owner: "ku2482" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "🧶 Modular VAE Disentanglement Framework built with PyTorch Lightning. Optionally configured and run with Hydra Config." + number_of_stars: 9 + description: "A PyTorch Implementation of Deep Feature Consistent Variational Autoencoder." } repositories: { - url: "https://github.com/bhpfelix/Variational-Autoencoder-PyTorch" - owner: "bhpfelix" + url: "https://github.com/AntixK/PyTorch-VAE" + owner: "AntixK" framework: FRAMEWORK_PYTORCH - number_of_stars: 144 - description: "Variational Autoencoder implemented with PyTorch, Trained over CelebA Dataset" + number_of_stars: 2276 + description: "A Collection of Variational Autoencoders (VAE) in PyTorch." } repositories: { - url: "https://github.com/UdbhavPrasad072300/Generate-Fake-Faces-with-CVAE-in-PyTorch" - owner: "UdbhavPrasad072300" + url: "https://github.com/vinoth654321/Beta-Vae-face-dataset" + owner: "vinoth654321" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Making fake faces with a Convolutional Variational Autoencoder in PyTorch with celebA dataset" } repositories: { - url: "https://github.com/bogedy/intro_dfc" - owner: "bogedy" - framework: FRAMEWORK_TENSORFLOW - description: "Introspective Deep Feature Consistent Variational Autoencoder" - } - repositories: { - url: "https://github.com/peria1/VAEconvMNIST" - owner: "peria1" + url: "https://github.com/inkplatform/beta-vae" + owner: "inkplatform" framework: FRAMEWORK_PYTORCH - description: "Basic Pytorch VAE adapted to use conv2d on MNIST" } repositories: { url: "https://github.com/Nanway/dfc-vae" @@ -8758,21 +8808,29 @@ pr_id_to_video: { description: "I turned my friends into dogs and made computer generated images of them with this deep feature consistent variational autoencoder" } repositories: { - url: "https://github.com/inkplatform/beta-vae" - owner: "inkplatform" + url: "https://github.com/peria1/VAEconvMNIST" + owner: "peria1" framework: FRAMEWORK_PYTORCH + description: "Basic Pytorch VAE adapted to use conv2d on MNIST" } repositories: { - url: "https://github.com/vinoth654321/Beta-Vae-face-dataset" - owner: "vinoth654321" + url: "https://github.com/bogedy/intro_dfc" + owner: "bogedy" + framework: FRAMEWORK_TENSORFLOW + description: "Introspective Deep Feature Consistent Variational Autoencoder" + } + repositories: { + url: "https://github.com/UdbhavPrasad072300/Generate-Fake-Faces-with-CVAE-in-PyTorch" + owner: "UdbhavPrasad072300" framework: FRAMEWORK_PYTORCH + description: "Making fake faces with a Convolutional Variational Autoencoder in PyTorch with celebA dataset" } repositories: { - url: "https://github.com/matthew-liu/beta-vae" - owner: "matthew-liu" + url: "https://github.com/bhpfelix/Variational-Autoencoder-PyTorch" + owner: "bhpfelix" framework: FRAMEWORK_PYTORCH - number_of_stars: 30 - description: "A Pytorch Implementation of the Beta-VAE" + number_of_stars: 145 + description: "Variational Autoencoder implemented with PyTorch, Trained over CelebA Dataset" } methods: { name: "VAE" @@ -8789,7 +8847,7 @@ pr_id_to_video: { video_id: "FfBp6xJqZVA" video_title: "PR-101: Deep Feature Consistent Variational Autoencoder" number_of_likes: 34 - number_of_views: 9223 + number_of_views: 9312 published_date: { seconds: 1536508427 } @@ -8814,74 +8872,77 @@ pr_id_to_video: { authors: "Tinghui Zhou" authors: "Alexei A. Efros" repositories: { - url: "https://github.com/justinjohn0306/EverybodyDanceNow-Colab" - owner: "justinjohn0306" + is_official: true + url: "https://github.com/carolineec/EverybodyDanceNow" + owner: "carolineec" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Motion Retargeting Video Subjects, Modified Colab Version by Justin John" + number_of_stars: 429 + description: "Motion Retargeting Video Subjects" } repositories: { - url: "https://github.com/j-void/ISL_v2v" - owner: "j-void" + url: "https://github.com/Lotayou/everybody_dance_now_pytorch" + owner: "Lotayou" framework: FRAMEWORK_PYTORCH + number_of_stars: 244 + description: "A PyTorch Implementation of \"Everybody Dance Now\" from Berkeley AI lab." } repositories: { - url: "https://github.com/rajatsahay/Pose2Pose" - owner: "rajatsahay" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Research Paper Implementation" + url: "https://github.com/wjy5446/pytorch-everybody-dance-now" + owner: "wjy5446" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: ":dancer: Dance Now !!!" } repositories: { - url: "https://github.com/martin220485/everybody_dance_now_pytorch" - owner: "martin220485" + url: "https://github.com/dakenan1/Everybody-Dance-Now" + owner: "dakenan1" framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Implementation of Everybody dance now via tensorflow" } repositories: { - url: "https://github.com/CNC-IISER-BHOPAL/Any-Body-Can-Dance" - owner: "CNC-IISER-BHOPAL" + url: "https://github.com/Novemser/deep-imitation" + owner: "Novemser" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 + number_of_stars: 9 + description: "Deep learning imitation" } repositories: { - url: "https://github.com/aman-arya/Any-Body-Can-Dance" - owner: "aman-arya" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RAGHAV2998/Everybody-Can-Dance-Now-Video-game-version-" + owner: "RAGHAV2998" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "Implemented human to game character transfer based on the UCB paper with the same name. Used pose estimation api by CMU and trained a Conditional GAN model on the pose estimated stick pose stick figure to learn a video game character. Further refinement in the project can be of great use to get animated version of real-life movies. " } repositories: { - is_official: true - url: "https://github.com/carolineec/EverybodyDanceNow" - owner: "carolineec" + url: "https://github.com/VisiumCH/AMLD2020-Dirty-Gancing" + owner: "VisiumCH" framework: FRAMEWORK_PYTORCH - number_of_stars: 419 - description: "Motion Retargeting Video Subjects" + number_of_stars: 16 + description: "AMLD 2020" } repositories: { - url: "https://github.com/Lotayou/everybody_dance_now_pytorch" - owner: "Lotayou" + url: "https://github.com/ElApseR/Everybody-Dance-Now" + owner: "ElApseR" framework: FRAMEWORK_PYTORCH - number_of_stars: 240 - description: "A PyTorch Implementation of \"Everybody Dance Now\" from Berkeley AI lab." + number_of_stars: 1 } repositories: { - url: "https://github.com/wjy5446/pytorch-everybody-dance-now" - owner: "wjy5446" + url: "https://github.com/aman-arya/Any-Body-Can-Dance" + owner: "aman-arya" framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: ":dancer: Dance Now !!!" } repositories: { - url: "https://github.com/dakenan1/Everybody-Dance-Now" - owner: "dakenan1" + url: "https://github.com/CNC-IISER-BHOPAL/Any-Body-Can-Dance" + owner: "CNC-IISER-BHOPAL" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Implementation of Everybody dance now via tensorflow" + number_of_stars: 3 } } video: { video_id: "_onRnCb_h3Q" video_title: "PR-102: Everybody Dance Now" - number_of_views: 1375 + number_of_views: 1394 published_date: { seconds: 1536505303 } @@ -8914,7 +8975,7 @@ pr_id_to_video: { video_id: "zpJwm7f7EXs" video_title: "PR-103: Visualizing Data using t-SNE" number_of_likes: 36 - number_of_views: 2909 + number_of_views: 2957 published_date: { seconds: 1537108725 } @@ -8942,60 +9003,62 @@ pr_id_to_video: { authors: "Jan Kautz" authors: "Bryan Catanzaro" repositories: { - url: "https://github.com/play166/vid2vid" - owner: "play166" + url: "https://github.com/Sjunna9819/My-First-Project" + owner: "Sjunna9819" framework: FRAMEWORK_PYTORCH - description: "make myself for building successful" } repositories: { - url: "https://github.com/MadRabbit-jt/vid2vid" - owner: "MadRabbit-jt" + is_official: true + url: "https://github.com/NVIDIA/vid2vid" + owner: "NVIDIA" framework: FRAMEWORK_PYTORCH - description: "make myself for building successful" + number_of_stars: 7795 + description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." } repositories: { - url: "https://github.com/divyanshpuri02/divyansh.github.io" - owner: "divyanshpuri02" + url: "https://github.com/eric-erki/vid2vid" + owner: "eric-erki" framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." } repositories: { - url: "https://github.com/divyanshpuri02/Nvidia" - owner: "divyanshpuri02" + url: "https://github.com/freedombenLiu/vid2vid" + owner: "freedombenLiu" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/BUTIYO/vid2vid-test" - owner: "BUTIYO" + url: "https://github.com/yawayo/vid2vid" + owner: "yawayo" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/Sjunna9819/My-First-Project" - owner: "Sjunna9819" + url: "https://github.com/fniroui/depth2room" + owner: "fniroui" framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Real time video synthesis for robotic control development." } repositories: { - is_official: true - url: "https://github.com/NVIDIA/vid2vid" - owner: "NVIDIA" + url: "https://github.com/sakshamgupta006/video-to-video-synthesis" + owner: "sakshamgupta006" framework: FRAMEWORK_PYTORCH - number_of_stars: 7758 - description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." + number_of_stars: 4 + description: "A Pytorch implementation of Video to Video Synthesis by Nvidia" } repositories: { - url: "https://github.com/eric-erki/vid2vid" - owner: "eric-erki" + url: "https://github.com/BUTIYO/vid2vid-test" + owner: "BUTIYO" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic video-to-video translation." } repositories: { - url: "https://github.com/freedombenLiu/vid2vid" - owner: "freedombenLiu" + url: "https://github.com/divyanshpuri02/Nvidia" + owner: "divyanshpuri02" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/yawayo/vid2vid" - owner: "yawayo" + url: "https://github.com/divyanshpuri02/divyansh.github.io" + owner: "divyanshpuri02" framework: FRAMEWORK_PYTORCH } } @@ -9003,7 +9066,7 @@ pr_id_to_video: { video_id: "WxeeqxqnRyE" video_title: "PR-104: Video-to-Video synthesis" number_of_likes: 16 - number_of_views: 1616 + number_of_views: 1622 published_date: { seconds: 1537107746 } @@ -9030,19 +9093,12 @@ pr_id_to_video: { authors: "Mark Sandler" authors: "Andrew Howard" authors: "Quoc V. Le" - repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" - framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" - } repositories: { is_official: true url: "https://github.com/tensorflow/tpu" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 + number_of_stars: 4415 description: "Reference models and tools for Cloud TPUs." } repositories: { @@ -9063,14 +9119,14 @@ pr_id_to_video: { url: "https://github.com/mingxingtan/mnasnet" owner: "mingxingtan" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 26 + number_of_stars: 27 description: "MnasNet snapshot" } repositories: { url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet" owner: "official" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 + number_of_stars: 4415 description: "Reference models and tools for Cloud TPUs." } repositories: { @@ -9090,7 +9146,7 @@ pr_id_to_video: { url: "https://github.com/rwightman/gen-efficientnet-pytorch" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 1355 + number_of_stars: 1369 description: "Pretrained EfficientNet, EfficientNet-Lite, MixNet, MobileNetV3 / V2, MNASNet A1 and B1, FBNet, Single-Path NAS" } repositories: { @@ -9100,6 +9156,13 @@ pr_id_to_video: { number_of_stars: 2 description: "Pytorch implementation of MnasNet-A1 & MnasNet-B1" } + repositories: { + url: "https://github.com/meijieru/yet_another_mobilenet_series" + owner: "meijieru" + framework: FRAMEWORK_PYTORCH + number_of_stars: 63 + description: "Yet Another Pytorch Distributed MobileNetV2-based Networks Implementation" + } methods: { name: "Average Pooling" full_name: "Average Pooling" @@ -9155,7 +9218,7 @@ pr_id_to_video: { video_id: "4uDZxefPd-I" video_title: "PR-105: MnasNet: Platform-Aware Neural Architecture Search for Mobile" number_of_likes: 23 - number_of_views: 1980 + number_of_views: 2028 published_date: { seconds: 1538623331 } @@ -9190,14 +9253,14 @@ pr_id_to_video: { url: "https://github.com/Jianbo-Lab/L2X" owner: "Jianbo-Lab" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 90 + number_of_stars: 92 } } video: { video_id: "id_CmUaTWpg" video_title: "PR-106: Learning to Explain: An Information-Theoretic Perspective on Model Interpretation" number_of_likes: 11 - number_of_views: 1142 + number_of_views: 1150 published_date: { seconds: 1538321661 } @@ -9224,66 +9287,72 @@ pr_id_to_video: { authors: "Andrew Tao" authors: "Bryan Catanzaro" repositories: { - url: "https://github.com/WendongZh/SPL" - owner: "WendongZh" + url: "https://github.com/NVIDIA/partialconv" + owner: "NVIDIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "Code for Context-Aware Image Inpainting with Learned Semantic Priors, IJCAI 2021" + number_of_stars: 954 + description: "A New Padding Scheme: Partial Convolution based Padding" } repositories: { - url: "https://github.com/feixuetuba/inpating" - owner: "feixuetuba" + url: "https://github.com/youyuge34/AnimeInPaint" + owner: "youyuge34" framework: FRAMEWORK_PYTORCH - description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" + number_of_stars: 812 + description: "An application tool of edge-connect, which can do anime inpainting and drawing. 动漫人物图片自动修复,去马赛克,填补,去瑕疵" } repositories: { - url: "https://github.com/jshi31/edge-connect" - owner: "jshi31" + url: "https://github.com/naoto0804/pytorch-inpainting-with-partial-conv" + owner: "naoto0804" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 465 + description: "Unofficial pytorch implementation of 'Image Inpainting for Irregular Holes Using Partial Convolutions' [Liu+, ECCV2018]" } repositories: { - url: "https://github.com/ayulockin/deepimageinpainting" - owner: "ayulockin" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 - description: "Deep Image Inpainting using UNET like Vanilla Autoencoder and Partial Convolution based Autoencoder. " + url: "https://github.com/TrinhQuocNguyen/Edited_PConv-Keras" + owner: "TrinhQuocNguyen" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Code borrowed heavily from: https://github.com/MathiasGruber/PConv-Keras" } repositories: { - url: "https://github.com/KPMG-wiseuniv/AI" - owner: "KPMG-wiseuniv" + url: "https://github.com/bobqywei/inpainting-partial-conv" + owner: "bobqywei" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "for AI" + number_of_stars: 65 + description: "PyTorch implementation of \"Image Inpainting for Irregular Holes Using Partial Convolutions\"" } repositories: { - url: "https://github.com/hiyaroy12/DFT_inpainting" - owner: "hiyaroy12" + url: "https://github.com/KumapowerLIU/Rethinking-Inpainting-MEDFE" + owner: "KumapowerLIU" framework: FRAMEWORK_PYTORCH - number_of_stars: 12 - description: "Image inpainting using frequency domain priors" + number_of_stars: 269 + description: "Rethinking Image Inpainting via a Mutual Encoder Decoder with Feature Equalizations. ECCV 2020 Oral" } repositories: { - url: "https://github.com/yashk2000/SneakySketchers" - owner: "yashk2000" + url: "https://github.com/SunnerLi/P-Conv" + owner: "SunnerLi" framework: FRAMEWORK_PYTORCH - number_of_stars: 14 - description: "A python desktop application that allows you to do image inpainting by directly drawing on it. " + number_of_stars: 12 + description: "The implementation of the partial convolution" } repositories: { - url: "https://github.com/preeti-2810/object-removal" - owner: "preeti-2810" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/SimonDele/School-projects" + owner: "SimonDele" + framework: FRAMEWORK_OTHERS + description: " List of some interesting projects completed while being at INSA Rouen Normandie in Applied Mathematics department." } repositories: { - url: "https://github.com/Maouriyan/inpainting_demo" - owner: "Maouriyan" + url: "https://github.com/gmin7/pconv_implementation" + owner: "gmin7" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Minimal implementation of NVIDIA's Image Inpainting algorithm in PyTorch" } repositories: { - url: "https://github.com/chefpr7/Image-Inpainting-using-Partial-Convolutional-Layers" - owner: "chefpr7" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/YeshengSu/EdgeConnect" + owner: "YeshengSu" + framework: FRAMEWORK_PYTORCH + description: "Generative Image Inpainting" } methods: { name: "Convolution" @@ -9295,7 +9364,7 @@ pr_id_to_video: { video_id: "BhZN6AqfylA" video_title: "PR-107: Image Inpainting for Irregular Holes Using Partial Convolutions" number_of_likes: 27 - number_of_views: 2744 + number_of_views: 2766 published_date: { seconds: 1539060135 } @@ -9320,73 +9389,73 @@ pr_id_to_video: { authors: "Menglong Zhu" authors: "Andrey Zhmoginov" authors: "Liang-Chieh Chen" - repositories: { - url: "https://github.com/Gideon0805/Tensorflow1.15-Model-Pruning" - owner: "Gideon0805" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Pruning for TF1.5" - } repositories: { url: "https://github.com/tensorflow/models/tree/master/research/deeplab" owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + url: "https://github.com/pytorch/vision" + owner: "pytorch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9607 + description: "Datasets, Transforms and Models specific to Computer Vision" } repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + url: "https://github.com/heidongxianhau/deeplab2" + owner: "heidongxianhau" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 } repositories: { - url: "https://github.com/espressif/esp-who" - owner: "espressif" - framework: FRAMEWORK_OTHERS - number_of_stars: 1068 - description: "Face detection and recognition framework" + url: "https://github.com/TTMRonald/MobileNet_Zoo" + owner: "TTMRonald" + framework: FRAMEWORK_TENSORFLOW + description: "A Keras implementation of MobileNet_V1 and MobileNet_V2." } repositories: { - url: "https://github.com/Gideon0805/Tensorflow_Model_Pruning" - owner: "Gideon0805" + url: "https://github.com/Robinatp/Deeplab_Tensorflow" + owner: "Robinatp" framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 - description: "Pruning for TF1.5" + description: "DeepLab: Deep Labelling for Semantic Image Segmentation" } repositories: { - url: "https://github.com/akrapukhin/MobileNetV3" - owner: "akrapukhin" - framework: FRAMEWORK_PYTORCH - description: "An implementation of the MobileNetV3 models in Pytorch with scripts for training, testing and measuring latency." + url: "https://github.com/motlabs/dont-be-turtle" + owner: "motlabs" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 29 + description: "A mobile ML project to detect turtle neck posture while your working" } repositories: { - url: "https://github.com/pytorch/vision" - owner: "pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9433 - description: "Datasets, Transforms and Models specific to Computer Vision" + url: "https://github.com/Vignesh-95/cnn-semantic-segmentation-satellite-images" + owner: "Vignesh-95" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "The analysis of satellite imagery can provide vital insights in various corporate and governmental application areas. Semantic segmentation of satellite imagery provides dense pixel-wise knowledge, which is necessary to facilitate such analysis. Deep convolutional neural networks are state of the art, when it comes to semantic segmentation of digital images. However, characteristics unique to satellite imagery like their high-resolution, multi-channel input and limited availability of data-sets challenge the designing, parametrizing and training of the neural network model." } repositories: { - url: "https://github.com/stevensmiley1989/MrRobot" - owner: "stevensmiley1989" + url: "https://github.com/kekeller/semantic_soy_deeplabv3plus" + owner: "kekeller" framework: FRAMEWORK_TENSORFLOW - description: "This is a robot I designed in Fusion 360 and 3D printed with my FlashForge Creator Pro in PLA, Main Hardware: 1 x Raspberry Pi 3b, 3 x Arduinos with I2C, 5 x ultrasonic sensors, 4 x 60Kg Servos, 4 x 12V 200rpm DC motors, 1 x stepper motor for loading ammo into custom built coil gun. The coil gun uses 2 x 450V 1000uF Capacitors in parallel with a boost converter, yielding 380V maximum charge discharge from a 12V input, firing with a 1.2kV maximum peak non-repetitive surge current 1.1kA rated Thyristor SCR, Main Software: Uses TensorFlow and Python for Object Detection with some C++ for motor controls. The model used is a retrained Single Shot Detection MobileNet V2 algorithm trained on a toy reindeer. Signal processing allows proportional controller feedback to adjust movement of the robot for moving, aiming, and shooting. An application for IOS was written in Swift to control the robot as well, using Mosquito MQTT Broker for communication. " + number_of_stars: 6 + description: "Use the tensorflow deeplab version 3+ to semantically segment images of soybean leaves. " } repositories: { - url: "https://github.com/d-li14/mobilenetv2.pytorch" - owner: "d-li14" + url: "https://github.com/xxradon/IGCV3-pytorch" + owner: "xxradon" framework: FRAMEWORK_PYTORCH - number_of_stars: 403 - description: "72.8% MobileNetV2 1.0 model on ImageNet and a spectrum of pre-trained MobileNetV2 models" + number_of_stars: 17 + description: "IGCV3 reimplement by pytorch" + } + repositories: { + url: "https://github.com/zym1119/MobileNetV2_pytorch_cifar" + owner: "zym1119" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "A complete implementation of MobileNetv2 on CIFAR dataset in PyTorch" } methods: { name: "DeepLabv3" @@ -9442,8 +9511,8 @@ pr_id_to_video: { video: { video_id: "mT5Y-Zumbbw" video_title: "PR-108: MobileNetV2: Inverted Residuals and Linear Bottlenecks" - number_of_likes: 70 - number_of_views: 8335 + number_of_likes: 71 + number_of_views: 8532 published_date: { seconds: 1540388729 } @@ -9467,70 +9536,72 @@ pr_id_to_video: { authors: "Jeff Donahue" authors: "Karen Simonyan" repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - owner: "roberttwomey" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" + url: "https://github.com/ZVK/Talking-Heads" + owner: "ZVK" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 } repositories: { - url: "https://github.com/notperquisites/bigsleep" - owner: "notperquisites" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Personal Big Sleep AI Repo" + url: "https://github.com/taki0112/BigGAN-Tensorflow" + owner: "taki0112" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 245 + description: "Simple Tensorflow implementation of \"Large Scale GAN Training for High Fidelity Natural Image Synthesis\" (BigGAN)" } repositories: { - url: "https://github.com/lucidrains/big-sleep" - owner: "lucidrains" + url: "https://github.com/ajbrock/BigGAN-PyTorch" + owner: "ajbrock" framework: FRAMEWORK_PYTORCH - number_of_stars: 1222 - description: "A simple command line tool for text to image generation, using OpenAI's CLIP and a BigGAN. Technique was originally created by https://twitter.com/advadnoun" + number_of_stars: 2368 + description: "The author's officially unofficial PyTorch BigGAN implementation." } repositories: { - url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter08" - owner: "master" + url: "https://github.com/ANIME305/Anime-GAN-tensorflow" + owner: "ANIME305" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" + number_of_stars: 182 + description: "The BIGGAN based Anime generation implemented with tensorflow. All training data has been open sourced." } repositories: { - url: "https://github.com/yaxingwang/DeepI2I" - owner: "yaxingwang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 20 - description: "Image-to-image translation, knowledge transfer" + url: "https://github.com/ANIME305/Anime-GAN" + owner: "ANIME305" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 182 + description: "The BIGGAN based Anime generation implemented with tensorflow. All training data has been open sourced." } repositories: { - url: "https://github.com/uoguelph-mlrg/instance_selection_for_gans" - owner: "uoguelph-mlrg" - framework: FRAMEWORK_PYTORCH - number_of_stars: 29 - description: "Official code repository for Instance Selection for GANs." + url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter08" + owner: "master" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 75 + description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" } repositories: { - url: "https://github.com/minyoungg/pix2latent" - owner: "minyoungg" + url: "https://github.com/ivclab/BigGAN-Generator-Pretrained-Pytorch" + owner: "ivclab" framework: FRAMEWORK_PYTORCH - number_of_stars: 153 - description: "Code for: Transforming and Projecting Images into Class-conditional Generative Networks" + number_of_stars: 32 + description: "Pytorch implementation of BigGAN Generator with pretrained weights" } repositories: { - url: "https://github.com/times2049/talkinghead" - owner: "times2049" + url: "https://github.com/vincent-thevenin/Realistic-Neural-Talking-Head-Models" + owner: "vincent-thevenin" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 661 + description: "My implementation of Few-Shot Adversarial Learning of Realistic Neural Talking Head Models (Egor Zakharov et al.)." } repositories: { - url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" - owner: "krisrjohnson" + url: "https://github.com/ZVK/talking_heads" + owner: "ZVK" framework: FRAMEWORK_PYTORCH + number_of_stars: 5 } repositories: { - url: "https://github.com/amanjaiswal73892/changemypet" - owner: "amanjaiswal73892" + url: "https://github.com/notperquisites/bigsleep" + owner: "notperquisites" framework: FRAMEWORK_PYTORCH - description: "Deep Learning Project" + number_of_stars: 1 + description: "Personal Big Sleep AI Repo" } methods: { name: "BigGAN" @@ -9586,8 +9657,8 @@ pr_id_to_video: { video: { video_id: "1f0faOeqDQ0" video_title: "PR-109: Large Scale GAN Training for High Fidelity Natural Image Synthesis" - number_of_likes: 12 - number_of_views: 1221 + number_of_likes: 13 + number_of_views: 1244 published_date: { seconds: 1539797131 } @@ -9664,7 +9735,7 @@ pr_id_to_video: { video_id: "nimHWHxjBJ8" video_title: "PR-110: An Analysis of Scale Invariance in Object Detection – SNIP" number_of_likes: 14 - number_of_views: 1230 + number_of_views: 1238 published_date: { seconds: 1540590955 } @@ -9693,7 +9764,7 @@ pr_id_to_video: { video_id: "uwRz7PjVtB0" video_title: "PR-111: EVA2:Exploiting Temporal Redundancy in Live Computer Vision" number_of_likes: 23 - number_of_views: 800 + number_of_views: 804 published_date: { seconds: 1540137553 } @@ -9715,10 +9786,11 @@ pr_id_to_video: { } authors: "Jonathon Shlens" repositories: { - url: "https://github.com/VU-BEAM-Lab/ADMIRE" - owner: "VU-BEAM-Lab" + url: "https://github.com/petteriTeikari/mixedImageSeparation" + owner: "petteriTeikari" framework: FRAMEWORK_OTHERS - number_of_stars: 1 + number_of_stars: 3 + description: "with FastICA (and icasso for robustness)" } repositories: { url: "https://github.com/bhaskar-agrawal/Independent-component-analysis" @@ -9726,11 +9798,10 @@ pr_id_to_video: { framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/petteriTeikari/mixedImageSeparation" - owner: "petteriTeikari" + url: "https://github.com/VU-BEAM-Lab/ADMIRE" + owner: "VU-BEAM-Lab" framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "with FastICA (and icasso for robustness)" + number_of_stars: 1 } methods: { name: "ICA" @@ -9758,7 +9829,7 @@ pr_id_to_video: { url: "https://github.com/roimehrez/PIRM2018" owner: "roimehrez" framework: FRAMEWORK_OTHERS - number_of_stars: 190 + number_of_stars: 192 description: "Workshop and Challenge on Perceptual Image Restoration and Manipulation" } } @@ -9766,7 +9837,7 @@ pr_id_to_video: { video_id: "6Yid4dituqo" video_title: "PR-113: The Perception Distortion Tradeoff" number_of_likes: 16 - number_of_views: 1376 + number_of_views: 1397 published_date: { seconds: 1540734798 } @@ -9794,14 +9865,14 @@ pr_id_to_video: { url: "https://github.com/aayushbansal/Recycle-GAN" owner: "aayushbansal" framework: FRAMEWORK_PYTORCH - number_of_stars: 378 + number_of_stars: 380 description: "Unsupervised Video Retargeting (e.g. face to face, flower to flower, clouds and winds, sunrise and sunset)" } } video: { video_id: "eMZXUqmp_PU" video_title: "PR-114: Recycle-GAN, Unsupervised Video Retargeting" - number_of_views: 1179 + number_of_views: 1188 published_date: { seconds: 1540738223 } @@ -9826,37 +9897,6 @@ pr_id_to_video: { authors: "Sebastian M. Waldstein" authors: "Ursula Schmidt-Erfurth" authors: "Georg Langs" - repositories: { - url: "https://github.com/YeongHyeon/f-AnoGAN-TF" - owner: "YeongHyeon" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "TensorFlow implementation of f-AnoGAN (with MNIST dataset)" - } - repositories: { - url: "https://github.com/xtarx/Unsupervised-Anomaly-Detection-with-Generative-Adversarial-Networks" - owner: "xtarx" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 93 - description: "Unsupervised Anomaly Detection with Generative Adversarial Networks on MIAS dataset" - } - repositories: { - url: "https://github.com/NMADALI97/Learning-With-Wasserstein-Loss" - owner: "NMADALI97" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/seungjunlee96/AnoGAN-pytorch" - owner: "seungjunlee96" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "Pytorch implementation of \"Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery\"" - } - repositories: { - url: "https://github.com/mullue/anogan-mnist" - owner: "mullue" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/fuchami/ANOGAN" owner: "fuchami" @@ -9874,7 +9914,7 @@ pr_id_to_video: { url: "https://github.com/LeeDoYup/AnoGAN" owner: "LeeDoYup" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 229 + number_of_stars: 231 description: "Unofficial Tensorflow Implementation of AnoGAN (Anomaly GAN)" } repositories: { @@ -9890,12 +9930,42 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW number_of_stars: 3 } + repositories: { + url: "https://github.com/crystal02146/AnoGAN-Keras" + owner: "crystal02146" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + } + repositories: { + url: "https://github.com/Xiaohui9607/f_anogan_pytorch" + owner: "Xiaohui9607" + framework: FRAMEWORK_PYTORCH + number_of_stars: 19 + description: "Code for reproducing f-AnoGAN in Pytorch" + } + repositories: { + url: "https://github.com/mullue/anogan-mnist" + owner: "mullue" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/seungjunlee96/AnoGAN-pytorch" + owner: "seungjunlee96" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "Pytorch implementation of \"Unsupervised Anomaly Detection with Generative Adversarial Networks to Guide Marker Discovery\"" + } + repositories: { + url: "https://github.com/NMADALI97/Learning-With-Wasserstein-Loss" + owner: "NMADALI97" + framework: FRAMEWORK_TENSORFLOW + } } video: { video_id: "R0H0gqtnMyA" video_title: "PR-115: Unsupervised Anomaly Detection with Generative Adversarial Networks" - number_of_likes: 35 - number_of_views: 3334 + number_of_likes: 34 + number_of_views: 3415 published_date: { seconds: 1541343064 } @@ -9918,66 +9988,75 @@ pr_id_to_video: { authors: "Diederik P. Kingma" authors: "Prafulla Dhariwal" repositories: { - url: "https://github.com/Naagar/Glow_NormalizingFlow_implimentation" - owner: "Naagar" - framework: FRAMEWORK_PYTORCH - description: "pyTorch implimentation of the Glow paper and Reimplementations of density estimation algorithms" + url: "https://github.com/musyoku/chainer-glow" + owner: "musyoku" + framework: FRAMEWORK_OTHERS + number_of_stars: 75 + description: "Glow: Generative Flow with Invertible 1×1 Convolutions" } repositories: { - url: "https://github.com/Zhangyanbo/iResNetLab" - owner: "Zhangyanbo" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "A python/pytorch package for invertible neural networks" + url: "https://github.com/musyoku/generative-flow" + owner: "musyoku" + framework: FRAMEWORK_OTHERS + number_of_stars: 75 + description: "Glow: Generative Flow with Invertible 1×1 Convolutions" } repositories: { - url: "https://github.com/Daniel-H-99/CRD" - owner: "Daniel-H-99" + url: "https://github.com/ex4sperans/variational-inference-with-normalizing-flows" + owner: "ex4sperans" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 159 + description: "Reimplementation of Variational Inference with Normalizing Flows (https://arxiv.org/abs/1505.05770)" } repositories: { - url: "https://github.com/simonwestberg/Glow" - owner: "simonwestberg" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "A replication of \"Glow: Generative Flow with Invertible 1×1 Convolutions\" and an investigation of its performance on Out-of-Distribution detection " + url: "https://github.com/rosinality/glow-pytorch" + owner: "rosinality" + framework: FRAMEWORK_PYTORCH + number_of_stars: 316 + description: "PyTorch implementation of Glow" } repositories: { - url: "https://github.com/simonwestberg/DD2412-Glow" - owner: "simonwestberg" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "A replication of \"Glow: Generative Flow with Invertible 1×1 Convolutions\" and an investigation of its performance on Out-of-Distribution detection " + url: "https://github.com/5yearsKim/Conditional-Normalizing-Flow" + owner: "5yearsKim" + framework: FRAMEWORK_PYTORCH + number_of_stars: 22 + description: "Conditional Generative model (Normalizing Flow) and experimenting style transfer using this model" } repositories: { - url: "https://github.com/samuelmat19/GLOW-tf2" - owner: "samuelmat19" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Glow: Generative Flow with Invertible 1x1 Convolutions in Tensorflow 2" + url: "https://github.com/chrischute/glow" + owner: "chrischute" + framework: FRAMEWORK_PYTORCH + number_of_stars: 62 + description: "Implementation of Glow in PyTorch" } repositories: { - url: "https://github.com/L0SG/NanoFlow" - owner: "L0SG" + url: "https://github.com/ikostrikov/pytorch-flows" + owner: "ikostrikov" framework: FRAMEWORK_PYTORCH - number_of_stars: 60 - description: "PyTorch implementation of the paper \"NanoFlow: Scalable Normalizing Flows with Sublinear Parameter Complexity.\"" + number_of_stars: 475 + description: "PyTorch implementations of algorithms for density estimation" } repositories: { - url: "https://github.com/eyalbetzalel/GLOW2" - owner: "eyalbetzalel" - framework: FRAMEWORK_OTHERS + url: "https://github.com/y0ast/Glow-PyTorch" + owner: "y0ast" + framework: FRAMEWORK_PYTORCH + number_of_stars: 270 + description: "Simple, extendable, easy to understand Glow implementation in PyTorch" } repositories: { - url: "https://github.com/ClaraBing/flow" - owner: "ClaraBing" + url: "https://github.com/eyalbetzalel/GLOW" + owner: "eyalbetzalel" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "glow" } repositories: { - url: "https://github.com/rhychen/Glow" - owner: "rhychen" - framework: FRAMEWORK_PYTORCH + is_official: true + url: "https://github.com/openai/glow" + owner: "openai" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2798 + description: "Code for reproducing results in \"Glow: Generative Flow with Invertible 1x1 Convolutions\"" } methods: { name: "GLOW" @@ -10029,7 +10108,7 @@ pr_id_to_video: { video_id: "6OVH1i2BVAE" video_title: "PR-116: Glow: Generative Flow with Invertible 1x1 Convolutions" number_of_likes: 21 - number_of_views: 2817 + number_of_views: 2856 published_date: { seconds: 1541342135 } @@ -10066,7 +10145,7 @@ pr_id_to_video: { video_id: "VQsG_Yk9KuQ" video_title: "PR-117: PeerNets: Exploiting Peer Wisdom Against Adversarial Attacks" number_of_likes: 5 - number_of_views: 769 + number_of_views: 774 published_date: { seconds: 1542016335 } @@ -10095,7 +10174,7 @@ pr_id_to_video: { url: "https://github.com/labsix/limited-blackbox-attacks" owner: "labsix" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 141 + number_of_stars: 145 description: "Code for \"Black-box Adversarial Attacks with Limited Queries and Information\" (http://arxiv.org/abs/1804.08598)" } repositories: { @@ -10109,7 +10188,7 @@ pr_id_to_video: { video_id: "AMPpOFtg3Q4" video_title: "PR-118: Black-Box Attacks with Limited Queries and Information" number_of_likes: 2 - number_of_views: 420 + number_of_views: 421 published_date: { seconds: 1541943972 } @@ -10135,7 +10214,7 @@ pr_id_to_video: { url: "https://github.com/razvancaramalau/Sequential-GCN-for-Active-Learning" owner: "razvancaramalau" framework: FRAMEWORK_PYTORCH - number_of_stars: 24 + number_of_stars: 28 } repositories: { url: "https://github.com/rpinsler/active-bayesian-coresets" @@ -10152,7 +10231,7 @@ pr_id_to_video: { video_id: "3ROQis3hxPs" video_title: "PR-119: Active Learning For Convolutional Neural Networks: A Core-Set Approach" number_of_likes: 23 - number_of_views: 1922 + number_of_views: 1959 published_date: { seconds: 1543402308 } @@ -10177,72 +10256,69 @@ pr_id_to_video: { authors: "Hai-Tao Zheng" authors: "Jian Sun" repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" + url: "https://github.com/ba-san/MobilePose-Pi" + owner: "ba-san" framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + number_of_stars: 15 + description: "MobilePose deployment for Raspberry Pi" } repositories: { - url: "https://github.com/pytorch/vision" - owner: "pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9433 - description: "Datasets, Transforms and Models specific to Computer Vision" + url: "https://github.com/forcefulowl/image_classification" + owner: "forcefulowl" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/PaddlePaddle/PaddleSeg" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 1763 - description: "End-to-end image segmentation kit based on PaddlePaddle. " + url: "https://github.com/allenai/dnw" + owner: "allenai" + framework: FRAMEWORK_PYTORCH + number_of_stars: 133 + description: "Discovering Neural Wirings (https://arxiv.org/abs/1906.00586)" } repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + url: "https://github.com/mnicnc404/CartoonGan-tensorflow" + owner: "mnicnc404" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 665 + description: "Generate your own cartoon-style images with CartoonGAN (CVPR 2018), powered by TensorFlow 2.0 Alpha." } repositories: { - url: "https://github.com/allen108108/Model-Optimizer_Implementation" - owner: "allen108108" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Training different model on MNIST datadet to see their performance" + url: "https://github.com/xggIoU/centernet_tensorflow_wilderface_voc" + owner: "xggIoU" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 103 + description: "This is the unofficial implementation of the \"CenterNet:Objects as Points\".Just a simple try with self-modified shufflenetv2 and yolov3.If you want better results, you need more experiments." } repositories: { - url: "https://github.com/zjZSTU/LightWeightCNN" - owner: "zjZSTU" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "轻量化卷积神经网络实现(SqueezeNet/MobileNet/ShuffleNet/MnasNet)" + url: "https://github.com/Qengineering/ShuffleNetV2-ncnn" + owner: "Qengineering" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "ShuffleNet_V2 for ncnn framework" } repositories: { - url: "https://github.com/ba-san/MobilePose-Pi" - owner: "ba-san" + url: "https://github.com/savageyusuff/MobilePose-Pi" + owner: "savageyusuff" framework: FRAMEWORK_PYTORCH number_of_stars: 15 description: "MobilePose deployment for Raspberry Pi" } repositories: { - url: "https://github.com/forcefulowl/image_classification" - owner: "forcefulowl" + url: "https://github.com/PaulGitt/ShuffleNetV2-tensorflow" + owner: "PaulGitt" framework: FRAMEWORK_TENSORFLOW + description: "ShuffleNetV2 in tensorflow (A simple way) " } repositories: { - url: "https://github.com/allenai/dnw" - owner: "allenai" - framework: FRAMEWORK_PYTORCH - number_of_stars: 132 - description: "Discovering Neural Wirings (https://arxiv.org/abs/1906.00586)" + url: "https://github.com/sokeeffe/caffe_yolo" + owner: "sokeeffe" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/mnicnc404/CartoonGan-tensorflow" - owner: "mnicnc404" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 654 - description: "Generate your own cartoon-style images with CartoonGAN (CVPR 2018), powered by TensorFlow 2.0 Alpha." + url: "https://github.com/osmr/imgclsmob" + owner: "osmr" + framework: FRAMEWORK_OTHERS + number_of_stars: 2268 + description: "Sandbox for training deep learning networks" } methods: { name: "Average Pooling" @@ -10298,8 +10374,8 @@ pr_id_to_video: { video: { video_id: "lrU6uXiJ_9Y" video_title: "PR-120: ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - number_of_likes: 35 - number_of_views: 2914 + number_of_likes: 36 + number_of_views: 2957 published_date: { seconds: 1542552935 } @@ -10324,66 +10400,67 @@ pr_id_to_video: { authors: "Kenton Lee" authors: "Kristina Toutanova" repositories: { - url: "https://github.com/dnanhkhoa/pytorch-pretrained-BERT" - owner: "dnanhkhoa" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A PyTorch implementation of Google AI's BERT model provided with Google's pre-trained models, examples and utilities." + url: "https://github.com/chrisseiler96/bert-client-server-tests" + owner: "chrisseiler96" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/itspreeti25/Q-A-implementation-using-BERT-finetuned-on-multiple-datasets" - owner: "itspreeti25" - framework: FRAMEWORK_OTHERS - description: "In Progress." + url: "https://github.com/kingcheng2000/bert" + owner: "kingcheng2000" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/skoltech-nlp/rudetoxifier" - owner: "skoltech-nlp" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "Code and data of \"Methods for Detoxification of Texts for the Russian Language\" paper" + url: "https://github.com/frankcgq105/BERTCHEN" + owner: "frankcgq105" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 } repositories: { - url: "https://github.com/avishek-018/single_notebook_projects" - owner: "avishek-018" + url: "https://github.com/brightmart/bert_customized" + owner: "brightmart" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + number_of_stars: 25 + description: "bert with customized features" } repositories: { - url: "https://github.com/airsplay/vimpac" - owner: "airsplay" + url: "https://github.com/GauthierDmn/question_answering" + owner: "GauthierDmn" framework: FRAMEWORK_PYTORCH - number_of_stars: 36 + number_of_stars: 13 + description: "Question Answering task using Deep Learning on SQuAD dataset" } repositories: { - url: "https://github.com/han-shi/SparseBERT" - owner: "han-shi" + url: "https://github.com/jsantoso2/yelp-clone-ml-project" + owner: "jsantoso2" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 1 + description: "Yelp Clone app in React with Flask API endpoints, combined with BERT model in Pytorch with 66% accuracy." } repositories: { - url: "https://github.com/NoraH2004/adv-absa" - owner: "NoraH2004" + url: "https://github.com/weidafeng/NLU2019" + owner: "weidafeng" framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "NLU2019 project: Question NLI. The task is to determine whether the context sentence contains the answer to the question (entailment or not entailment)." } repositories: { - url: "https://github.com/SindhuMadi/FakeNewsDetection" - owner: "SindhuMadi" - framework: FRAMEWORK_OTHERS - description: "BERT and RoBERTa" + url: "https://github.com/JohannLee1996/bert" + owner: "JohannLee1996" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/mlm" - owner: "transformers" + url: "https://github.com/chambliss/Multilingual_NER" + owner: "chambliss" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + number_of_stars: 101 + description: "Applying BERT to named entity recognition in English and Russian." } repositories: { - url: "https://github.com/JA-Bar/nlp-depression" - owner: "JA-Bar" + url: "https://github.com/benywon/ChineseBert" + owner: "benywon" framework: FRAMEWORK_PYTORCH - description: "NLP course project. Tool to potentially identify signs of depression from text and audio." + number_of_stars: 23 + description: "This is a chinese Bert model specific for question answering" } methods: { name: "Scaled Dot-Product Attention" @@ -10440,7 +10517,7 @@ pr_id_to_video: { video_id: "GK4IO3qOnLc" video_title: "PR-121: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" number_of_likes: 41 - number_of_views: 2843 + number_of_views: 2874 published_date: { seconds: 1543981172 } @@ -10465,38 +10542,24 @@ pr_id_to_video: { authors: "Mohamed Elhoseiny" authors: "Marian Mazzone" repositories: { - url: "https://github.com/otepencelik/GAN-Artwork-Generation" - owner: "otepencelik" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + url: "https://github.com/AndreasWieg/Creative-GAN" + owner: "AndreasWieg" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: " Art-GAN" } repositories: { url: "https://github.com/naotokui/CreativeGAN-Rhythm" owner: "naotokui" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 19 + number_of_stars: 20 description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" } repositories: { - url: "https://github.com/dylanell/conditional-wgan" - owner: "dylanell" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Implementation of Conditional Wasserstein Generative Adversarial Network (GAN) in PyTorch" - } - repositories: { - url: "https://github.com/mlberkeley/Creative-Adversarial-Networks" - owner: "mlberkeley" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 200 - description: "(WIP) Implementation of Creative Adversarial Networks https://arxiv.org/pdf/1706.07068.pdf" - } - repositories: { - url: "https://github.com/sfc-computational-creativity-lab/x-rhythm-can" - owner: "sfc-computational-creativity-lab" + url: "https://github.com/zawlinnnaing/CAN-thesis" + owner: "zawlinnnaing" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" + number_of_stars: 2 } repositories: { url: "https://github.com/VladAleshin/pytorch" @@ -10505,19 +10568,6 @@ pr_id_to_video: { number_of_stars: 1 description: "GAN (pet project on pytorch and flask)" } - repositories: { - url: "https://github.com/AndreasWieg/Creative-GAN" - owner: "AndreasWieg" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: " Art-GAN" - } - repositories: { - url: "https://github.com/zawlinnnaing/CAN-thesis" - owner: "zawlinnnaing" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } repositories: { url: "https://github.com/casey-barr/generative-models-in-tensorflow" owner: "casey-barr" @@ -10532,12 +10582,39 @@ pr_id_to_video: { number_of_stars: 17 description: "UAL, CCI - MSc course: 19/20 \"IU000128 Coding Three: Exploring Machine Intelligence\"" } + repositories: { + url: "https://github.com/sfc-computational-creativity-lab/x-rhythm-can" + owner: "sfc-computational-creativity-lab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Creative Adversarial Network for generating Dance Music Rhythm Patterns" + } + repositories: { + url: "https://github.com/mlberkeley/Creative-Adversarial-Networks" + owner: "mlberkeley" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 200 + description: "(WIP) Implementation of Creative Adversarial Networks https://arxiv.org/pdf/1706.07068.pdf" + } + repositories: { + url: "https://github.com/dylanell/conditional-wgan" + owner: "dylanell" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Implementation of Conditional Wasserstein Generative Adversarial Network (GAN) in PyTorch" + } + repositories: { + url: "https://github.com/otepencelik/GAN-Artwork-Generation" + owner: "otepencelik" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + } } video: { video_id: "TB7izZIWYyw" video_title: "PR-122: CAN: Creative Adversarial Networks" number_of_likes: 13 - number_of_views: 1142 + number_of_views: 1160 published_date: { seconds: 1543554496 } @@ -10565,18 +10642,12 @@ pr_id_to_video: { authors: "Zhiding Yu" authors: "Andrew Tao" authors: "Bryan Catanzaro" - repositories: { - url: "https://github.com/feixuetuba/inpating" - owner: "feixuetuba" - framework: FRAMEWORK_PYTORCH - description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" - } repositories: { is_official: true url: "https://github.com/NVIDIA/partialconv" owner: "NVIDIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 942 + number_of_stars: 954 description: "A New Padding Scheme: Partial Convolution based Padding" } repositories: { @@ -10586,6 +10657,12 @@ pr_id_to_video: { number_of_stars: 4 description: "auto adaptive framework for intrinsic hyperparameter selection, adaptive padding, normalized weights" } + repositories: { + url: "https://github.com/feixuetuba/inpating" + owner: "feixuetuba" + framework: FRAMEWORK_PYTORCH + description: "复现Image Inpainting for Irregular Holes Using Partial Convolutions" + } methods: { name: "Convolution" full_name: "Convolution" @@ -10595,8 +10672,8 @@ pr_id_to_video: { video: { video_id: "IKHzc7sGCxQ" video_title: "PR-123: Partial Convolution based Padding" - number_of_likes: 50 - number_of_views: 2276 + number_of_likes: 51 + number_of_views: 2288 published_date: { seconds: 1544173387 } @@ -10652,7 +10729,7 @@ pr_id_to_video: { url: "https://github.com/eriklindernoren/PyTorch-GAN" owner: "eriklindernoren" framework: FRAMEWORK_PYTORCH - number_of_stars: 9744 + number_of_stars: 9972 description: "PyTorch implementations of Generative Adversarial Networks." } repositories: { @@ -10666,7 +10743,7 @@ pr_id_to_video: { video_id: "8PoewOpK6b4" video_title: "PR-125: ENERGY-BASED GENERATIVE ADVERSARIAL NETWORKS" number_of_likes: 7 - number_of_views: 712 + number_of_views: 724 published_date: { seconds: 1544368518 } @@ -10690,70 +10767,72 @@ pr_id_to_video: { authors: "Natalia Neverova" authors: "Iasonas Kokkinos" repositories: { - url: "https://github.com/ubc-vision/DwNet" - owner: "ubc-vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 + url: "https://github.com/facebookresearch/DensePose" + owner: "facebookresearch" + framework: FRAMEWORK_OTHERS + number_of_stars: 6084 + description: "A real-time approach for mapping all human pixels of 2D RGB images to a 3D surface-based model of the body" } repositories: { - url: "https://github.com/hz-ants/DensePose" - owner: "hz-ants" + url: "https://github.com/ARMUGHAN-SHAHID/MoboDensepose" + owner: "ARMUGHAN-SHAHID" framework: FRAMEWORK_OTHERS + description: "DEnse" } repositories: { - url: "https://github.com/yongsheng268/DensePose" - owner: "yongsheng268" - framework: FRAMEWORK_OTHERS + url: "https://github.com/facebookresearch/detectron" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24562 + description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." } repositories: { - url: "https://github.com/jiajunhua/facebookresearch-DensePose" - owner: "jiajunhua" + url: "https://github.com/svikramank/DensePose" + owner: "svikramank" framework: FRAMEWORK_OTHERS + number_of_stars: 5 + description: "In this repo, I tried replicating the famous Facebook's DensePose R-CNN model and tried to visualize the collected DensePose-COCO dataset and show the correspondences to the SMPL model." } repositories: { - url: "https://github.com/sgoldyaev/DeepFashion.ADGAN" - owner: "sgoldyaev" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 + url: "https://github.com/StupidmanTan/facebookresearch" + owner: "StupidmanTan" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/chengjiali/DensePose3" - owner: "chengjiali" + url: "https://github.com/M-Usman10/DenseSqueeze-RCNN" + owner: "M-Usman10" framework: FRAMEWORK_OTHERS - description: "Adapt FB's DensePose for Python3" + number_of_stars: 2 + description: "Optimized implementation of DensePose RCNN" } repositories: { - url: "https://github.com/facebookresearch/DensePose" - owner: "facebookresearch" + url: "https://github.com/lncarter/Dencepose" + owner: "lncarter" framework: FRAMEWORK_OTHERS - number_of_stars: 6049 - description: "A real-time approach for mapping all human pixels of 2D RGB images to a 3D surface-based model of the body" + number_of_stars: 1 } repositories: { - url: "https://github.com/ARMUGHAN-SHAHID/MoboDensepose" - owner: "ARMUGHAN-SHAHID" + url: "https://github.com/freedombenLiu/DensePose" + owner: "freedombenLiu" framework: FRAMEWORK_OTHERS - description: "DEnse" } repositories: { - url: "https://github.com/facebookresearch/detectron" - owner: "facebookresearch" + url: "https://github.com/chuanqichen/deepcoaching" + owner: "chuanqichen" framework: FRAMEWORK_PYTORCH - number_of_stars: 24504 - description: "FAIR's research platform for object detection research, implementing popular algorithms like Mask R-CNN and RetinaNet." + number_of_stars: 3 + description: "Sports Coaching from Pose Estimation" } repositories: { - url: "https://github.com/svikramank/DensePose" - owner: "svikramank" + url: "https://github.com/jiajunhua/facebookresearch-Detectron" + owner: "jiajunhua" framework: FRAMEWORK_OTHERS - number_of_stars: 5 - description: "In this repo, I tried replicating the famous Facebook's DensePose R-CNN model and tried to visualize the collected DensePose-COCO dataset and show the correspondences to the SMPL model." } } video: { video_id: "-bvMCbk_FT8" video_title: "PR-126: DensePose: Dense Human Pose Estimation In The Wild" - number_of_views: 1796 + number_of_views: 1827 published_date: { seconds: 1544365241 } @@ -10777,77 +10856,75 @@ pr_id_to_video: { authors: "Dmitry Kalenichenko" authors: "James Philbin" repositories: { - url: "https://github.com/heorhii-bolotov/facenet" - owner: "heorhii-bolotov" + url: "https://github.com/altndrr/persona" + owner: "altndrr" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "Face recognition usage example" + number_of_stars: 1 + description: "[WIP] Research project on face recognition" } repositories: { - url: "https://github.com/SamarpanDas/Face-Recognition-Face-Verification" - owner: "SamarpanDas" + url: "https://github.com/mlpocprojects/facenet-poc" + owner: "mlpocprojects" framework: FRAMEWORK_TENSORFLOW - description: "I have implemented a Face Recognition & Face Verification model by One Shot Learning using the Triplet Loss function" + description: "Facenet POC using Inception Net for Google." } repositories: { - url: "https://github.com/sdamolini/LooksLikeWho" - owner: "sdamolini" + url: "https://github.com/soumik12345/Nearest-Celebrity-Face" + owner: "soumik12345" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "VGGFace2 Facial Recognition using Quadruplet Loss and 4 CNNs." + number_of_stars: 28 + description: "Tensorflow Implementation of FaceNet: A Unified Embedding for Face Recognition and Clustering to find the celebrity whose face matches the closest to yours." } repositories: { - url: "https://github.com/shi510/ffem" - owner: "shi510" + url: "https://github.com/ArturPrzybysz/MNIST-siamese" + owner: "ArturPrzybysz" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "Face Feature Embedding Module" - } - repositories: { - url: "https://github.com/obj2vec/obj2vec" - owner: "obj2vec" - framework: FRAMEWORK_OTHERS + number_of_stars: 6 } repositories: { - url: "https://github.com/akshayraghavan21/Face_Recognition_Using_Facenet" - owner: "akshayraghavan21" + url: "https://github.com/madhavambati/Face-Recognition" + owner: "madhavambati" framework: FRAMEWORK_TENSORFLOW - description: "A simple face recognition implementation using a pre-trained, one-shot learning model - FaceNet. Classification on custom dataset by using the WebCam to perform live face recognition." + number_of_stars: 13 + description: "Implementation of Face-recognition system using Inception Network and Siamese Network " } repositories: { - url: "https://github.com/tamerthamoqa/facenet-pytorch-glint360k" - owner: "tamerthamoqa" - framework: FRAMEWORK_PYTORCH - number_of_stars: 125 - description: "A PyTorch implementation of the 'FaceNet' paper for training a facial recognition model with Triplet Loss using the glint360k dataset. A pre-trained model using Triplet Loss is available for download." + url: "https://github.com/BradNeuberg/personal-photos-model" + owner: "BradNeuberg" + framework: FRAMEWORK_OTHERS + number_of_stars: 24 + description: "A neural net trained over a person's personal photo collection to do face detection" } repositories: { - url: "https://github.com/Atcold/torch-TripletEmbedding" - owner: "Atcold" + url: "https://github.com/AmrTarekk/Attendence-system-using-Facial-Recognition-" + owner: "AmrTarekk" framework: FRAMEWORK_OTHERS - number_of_stars: 157 - description: "TripletLoss used in Google's FaceNet paper" } repositories: { - url: "https://github.com/serengil/deepface" - owner: "serengil" + url: "https://github.com/yehengchen/FaceRecognition" + owner: "yehengchen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1853 - description: "DeepFace: A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" + number_of_stars: 11 + description: "SmartCar - Real-time SmartCar System (Driver/Passenger) which included Face ID Recognition and Head Pose Estimation" } repositories: { - url: "https://github.com/PushpakBhoge/Face_Recognition_TF" - owner: "PushpakBhoge" + url: "https://github.com/chenyeheng/SmartCar" + owner: "chenyeheng" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "A project to Recognise faces in photos and videos or in realtime" + number_of_stars: 11 + description: "SmartCar - Real-time SmartCar System (Driver/Passenger) which included Face ID Recognition and Head Pose Estimation" + } + repositories: { + url: "https://github.com/Anil1331/Facenet" + owner: "Anil1331" + framework: FRAMEWORK_OTHERS } } video: { video_id: "0k3X-9y_9S8" video_title: "PR-127: FaceNet" - number_of_likes: 61 - number_of_views: 4100 + number_of_likes: 64 + number_of_views: 4218 published_date: { seconds: 1544971153 } @@ -10885,7 +10962,7 @@ pr_id_to_video: { url: "https://github.com/huangsicong/TimbreTron" owner: "huangsicong" framework: FRAMEWORK_OTHERS - number_of_stars: 37 + number_of_stars: 38 description: "The repo accompanying the paper: TimbreTron: A WaveNet(CycleGAN(CQT(Audio))) Pipeline for Musical Timbre Transfer" } methods: { @@ -10908,7 +10985,7 @@ pr_id_to_video: { video_id: "5eofa6SksKU" video_title: "PR-128: TimbreTron: A Wavenet(CycleGAN(CQT(Audio))) pipeline for musical timbre transfer" number_of_likes: 6 - number_of_views: 550 + number_of_views: 553 published_date: { seconds: 1544973323 } @@ -10930,12 +11007,6 @@ pr_id_to_video: { } authors: "Alexander Sergeev" authors: "Mike Del Balso" - repositories: { - url: "https://github.com/hcyang99/horovod" - owner: "hcyang99" - framework: FRAMEWORK_TENSORFLOW - description: "Modify horovod/horovod to support nic switching" - } repositories: { url: "https://github.com/gridgentoo/UberHorovod" owner: "gridgentoo" @@ -10953,14 +11024,14 @@ pr_id_to_video: { url: "https://github.com/uber/horovod" owner: "uber" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11419 + number_of_stars: 11545 description: "Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." } repositories: { url: "https://github.com/horovod/horovod" owner: "horovod" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11419 + number_of_stars: 11545 description: "Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet." } repositories: { @@ -10996,12 +11067,18 @@ pr_id_to_video: { number_of_stars: 2 description: "Distributed training framework for TensorFlow, Keras, PyTorch, and MXNet. " } + repositories: { + url: "https://github.com/hcyang99/horovod" + owner: "hcyang99" + framework: FRAMEWORK_TENSORFLOW + description: "Modify horovod/horovod to support nic switching" + } } video: { video_id: "8zQECRiONAo" video_title: "PR-129: Horovod: fast and easy distributed deep learning in TensorFlow" - number_of_likes: 9 - number_of_views: 719 + number_of_likes: 10 + number_of_views: 738 published_date: { seconds: 1546077765 } @@ -11023,39 +11100,11 @@ pr_id_to_video: { } authors: "Jonathan Ho" authors: "Stefano Ermon" - repositories: { - url: "https://github.com/morikatron/GAIL_PPO" - owner: "morikatron" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Generative Adversarial Imitation Learning" - } - repositories: { - url: "https://github.com/HumanCompatibleAI/deep-rlsp" - owner: "HumanCompatibleAI" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 23 - description: "Code accompanying \"Learning What To Do by Simulating the Past\", ICLR 2021." - } - repositories: { - url: "https://github.com/HumanCompatibleAI/imitation" - owner: "HumanCompatibleAI" - framework: FRAMEWORK_PYTORCH - number_of_stars: 264 - description: "Clean PyTorch implementations of imitation learning algorithms" - } - repositories: { - url: "https://github.com/Khrylx/PyTorch-RL" - owner: "Khrylx" - framework: FRAMEWORK_PYTORCH - number_of_stars: 707 - description: "PyTorch implementation of Deep Reinforcement Learning: Policy Gradient methods (TRPO, PPO, A2C) and Generative Adversarial Imitation Learning (GAIL). Fast Fisher vector product TRPO." - } repositories: { url: "https://github.com/sisl/ngsim_env" owner: "sisl" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 102 + number_of_stars: 103 description: "Learning human driver models from NGSIM data with imitation learning." } repositories: { @@ -11081,16 +11130,44 @@ pr_id_to_video: { url: "https://github.com/hill-a/stable-baselines" owner: "hill-a" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3209 + number_of_stars: 3239 description: "A fork of OpenAI Baselines, implementations of reinforcement learning algorithms" } repositories: { url: "https://github.com/KAIST-AILab/deeprl_practice_colab" owner: "KAIST-AILab" framework: FRAMEWORK_OTHERS - number_of_stars: 4 + number_of_stars: 5 description: "Preparation for Deep Reinforcement Learning using Google Colab" } + repositories: { + url: "https://github.com/HumanCompatibleAI/airl" + owner: "HumanCompatibleAI" + framework: FRAMEWORK_PYTORCH + number_of_stars: 292 + description: "Clean PyTorch implementations of imitation learning algorithms" + } + repositories: { + url: "https://github.com/morikatron/GAIL_PPO" + owner: "morikatron" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Generative Adversarial Imitation Learning" + } + repositories: { + url: "https://github.com/Techget/gail-tf-sc2" + owner: "Techget" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Generative-Adversarial-Imitation-Learning on PySC2" + } + repositories: { + url: "https://github.com/Khrylx/PyTorch-RL" + owner: "Khrylx" + framework: FRAMEWORK_PYTORCH + number_of_stars: 718 + description: "PyTorch implementation of Deep Reinforcement Learning: Policy Gradient methods (TRPO, PPO, A2C) and Generative Adversarial Imitation Learning (GAIL). Fast Fisher vector product TRPO." + } methods: { name: "GAIL" full_name: "Generative Adversarial Imitation Learning" @@ -11101,7 +11178,7 @@ pr_id_to_video: { video_id: "XHmRsgFrCTM" video_title: "PR-130: Generative Adversarial Imitation Learning" number_of_likes: 14 - number_of_views: 2578 + number_of_views: 2661 published_date: { seconds: 1545573404 } @@ -11125,73 +11202,74 @@ pr_id_to_video: { authors: "Samuli Laine" authors: "Timo Aila" repositories: { - url: "https://github.com/bennyqp/artificial-inspiration" - owner: "bennyqp" + url: "https://github.com/mokeam/StatueStyleGAN" + owner: "mokeam" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "\"Artificial Inspiration\" is an attempt to stimulate and enhance human creativity in a new way using artificial intelligence to achieve new and more creative results. " + number_of_stars: 6 + description: "This repository contains the tensorflow implementation of A Style-Based Generator Architecture for Generative Adversarial Networks applied on Statues." } repositories: { - url: "https://github.com/comp-imaging-sci/pic-recon" - owner: "comp-imaging-sci" + url: "https://github.com/delta6189/Anime-Sketch-Colorizer" + owner: "delta6189" + framework: FRAMEWORK_PYTORCH + number_of_stars: 16 + description: "Automatic Sketch Colorization with reference image" + } + repositories: { + url: "https://github.com/woctezuma/stylegan2-projecting-images" + owner: "woctezuma" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Code associated with the paper \"Prior Image-Constrained Reconstruction using Style-Based Generative Models\" accepted to ICML 2021." + number_of_stars: 169 + description: "Projecting images to latent space with StyleGAN2." } repositories: { - url: "https://github.com/toshas/torch-fidelity" - owner: "toshas" + url: "https://github.com/rosinality/style-based-gan-pytorch" + owner: "rosinality" framework: FRAMEWORK_PYTORCH - number_of_stars: 258 - description: "High-fidelity performance metrics for generative models in PyTorch" + number_of_stars: 856 + description: "Implementation A Style-Based Generator Architecture for Generative Adversarial Networks in PyTorch" } repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - owner: "roberttwomey" + url: "https://github.com/RUTILEA/Chainer-StyleBasedGAN" + owner: "RUTILEA" framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" + number_of_stars: 2 + description: "Chainer implementation of Style-Based GAN" } repositories: { - url: "https://github.com/ariel415el/SimplePytorch-ALAE" - owner: "ariel415el" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of Adverserial autoencoders" + url: "https://github.com/ialhashim/StyleGAN-Tensorflow2" + owner: "ialhashim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 26 + description: "StyleGAN - TensorFlow 2.0 implementation compatible with the official code" } repositories: { - url: "https://github.com/jhKessler/Progressively-Growing-Generative-Adverserial-Network" - owner: "jhKessler" - framework: FRAMEWORK_PYTORCH - description: "Generative Adverserial Network for Image Generation" + url: "https://github.com/manicman1999/StyleGAN-Keras" + owner: "manicman1999" + framework: FRAMEWORK_OTHERS + number_of_stars: 160 + description: "StyleGAN made with Keras" } repositories: { - url: "https://github.com/genforce/interfacegan" - owner: "genforce" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 964 - description: "[CVPR 2020] Interpreting the Latent Space of GANs for Semantic Face Editing" + url: "https://github.com/pfnet-research/chainer-stylegan" + owner: "pfnet-research" + framework: FRAMEWORK_OTHERS + number_of_stars: 75 + description: "Chainer implementation of Style-based Generator" } repositories: { - url: "https://github.com/a514514772/hijackgan" - owner: "a514514772" + url: "https://github.com/itsuki8914/stylegan-TensorFlow" + owner: "itsuki8914" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 23 - description: "[CVPR 2021] Pytorch implementation of Hijack-GAN: Unintended-Use of Pretrained, Black-Box GANs" - } - repositories: { - url: "https://github.com/yaxingwang/DeepI2I" - owner: "yaxingwang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 20 - description: "Image-to-image translation, knowledge transfer" + number_of_stars: 1 + description: "A implementation of stylegan using Tensorflow" } repositories: { - url: "https://github.com/ariel415el/ALAE" - owner: "ariel415el" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of Adverserial autoencoders" + url: "https://github.com/taki0112/StyleGAN-Tensorflow" + owner: "taki0112" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 201 + description: "Simple & Intuitive Tensorflow implementation of StyleGAN (CVPR 2019 Oral)" } methods: { name: "Feedforward Network" @@ -11242,8 +11320,8 @@ pr_id_to_video: { video: { video_id: "TWzEbMrH59o" video_title: "PR-131: A Style-Based Generator Architecture for Generative Adversarial Networks" - number_of_likes: 64 - number_of_views: 3914 + number_of_likes: 66 + number_of_views: 3978 published_date: { seconds: 1546903803 } @@ -11271,67 +11349,66 @@ pr_id_to_video: { authors: "Cheng-Yang Fu" authors: "Alexander C. Berg" repositories: { - url: "https://github.com/huytranvan2010/SSD" - owner: "huytranvan2010" + url: "https://github.com/ShivamPrajapati2001/People_Counter" + owner: "ShivamPrajapati2001" framework: FRAMEWORK_OTHERS number_of_stars: 1 + description: "This is Real Time People Counting using OpenCV" } repositories: { - url: "https://github.com/stevensmiley1989/MrRobot" - owner: "stevensmiley1989" + url: "https://github.com/toseek01/eyebird" + owner: "toseek01" framework: FRAMEWORK_TENSORFLOW - description: "This is a robot I designed in Fusion 360 and 3D printed with my FlashForge Creator Pro in PLA, Main Hardware: 1 x Raspberry Pi 3b, 3 x Arduinos with I2C, 5 x ultrasonic sensors, 4 x 60Kg Servos, 4 x 12V 200rpm DC motors, 1 x stepper motor for loading ammo into custom built coil gun. The coil gun uses 2 x 450V 1000uF Capacitors in parallel with a boost converter, yielding 380V maximum charge discharge from a 12V input, firing with a 1.2kV maximum peak non-repetitive surge current 1.1kA rated Thyristor SCR, Main Software: Uses TensorFlow and Python for Object Detection with some C++ for motor controls. The model used is a retrained Single Shot Detection MobileNet V2 algorithm trained on a toy reindeer. Signal processing allows proportional controller feedback to adjust movement of the robot for moving, aiming, and shooting. An application for IOS was written in Swift to control the robot as well, using Mosquito MQTT Broker for communication. " - } - repositories: { - url: "https://github.com/birosjh/pytorch_ssd" - owner: "birosjh" - framework: FRAMEWORK_PYTORCH - description: "A project for me to play around and experiment with the different components of the Single Shot Multibox Detector." + description: "No data ...until" } repositories: { - url: "https://github.com/Chubbyman2/SSD_MobileNet_Hand_Tracker" - owner: "Chubbyman2" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/spencerkraisler/Finger-Counter" + owner: "spencerkraisler" + framework: FRAMEWORK_OTHERS number_of_stars: 1 - description: "A hand tracker created using OpenCV and a re-trained SSD MobileNet v2 via transfer learning on the EgoHands Dataset." + description: "I made two different models to count the number of fingers you're holding out." } repositories: { - url: "https://github.com/serengil/deepface" - owner: "serengil" + url: "https://github.com/KitaYoshihiro/sk" + owner: "KitaYoshihiro" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1853 - description: "DeepFace: A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" + number_of_stars: 1 } repositories: { - url: "https://github.com/AmirDavoodi/Hand-Gestures-Human-Robot-Interaction" - owner: "AmirDavoodi" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/guptarohit994/ECE285_Graduate_Descent_SSD" + owner: "guptarohit994" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "This project is the final project of the course Robotics 2019 and we are implementing hand gesture classifier to using it for controlling Mighty Thymio robot which is a differential robot." + description: "ECE285 SP19" } repositories: { - url: "https://github.com/bleedingfight/caffe-env" - owner: "bleedingfight" + url: "https://github.com/valentina-kustikova/dnn-object-detectors-comp" + owner: "valentina-kustikova" framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/nirajdevpandey/Object-detection-and-localization-using-SSD-" + owner: "nirajdevpandey" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 + description: "The repository contains project for object detection using \"single shot detection\" algorithm. " } repositories: { - url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" - owner: "KostadinovShalon" - framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + url: "https://github.com/Zoushuang86/quiz_w8" + owner: "Zoushuang86" + framework: FRAMEWORK_TENSORFLOW + description: "Use python and https://github.com/tensorflow/models/tree/r1.5 to train a model which comes from ssd and mobilenet." } repositories: { - url: "https://github.com/jaykshirsagar05/CrowdCounting" - owner: "jaykshirsagar05" - framework: FRAMEWORK_OTHERS + url: "https://github.com/EricYang3721/faces" + owner: "EricYang3721" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 } repositories: { - url: "https://github.com/ashwath007/amenity-detection" - owner: "ashwath007" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "aminity-detection" + url: "https://github.com/tchernitski/caffe.deploy" + owner: "tchernitski" + framework: FRAMEWORK_OTHERS } methods: { name: "SGD with Momentum" @@ -11388,7 +11465,7 @@ pr_id_to_video: { video_id: "ej1ISEoAK5g" video_title: "PR-132: SSD: Single Shot MultiBox Detector" number_of_likes: 123 - number_of_views: 10552 + number_of_views: 10738 published_date: { seconds: 1546786878 } @@ -11418,66 +11495,66 @@ pr_id_to_video: { authors: "Yangqing Jia" authors: "Kaiming He" repositories: { - url: "https://github.com/luminxu/ViPNAS" - owner: "luminxu" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "The official repo for CVPR2021——ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search." + url: "https://github.com/JunnYu/paddle_convbert" + owner: "JunnYu" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { - url: "https://github.com/nerminsamet/HPRNet" - owner: "nerminsamet" + url: "https://github.com/d-li14/PSConv" + owner: "d-li14" framework: FRAMEWORK_PYTORCH - number_of_stars: 19 + number_of_stars: 150 + description: "[ECCV 2020] PSConv: Squeezing Feature Pyramid into One Compact Poly-Scale Convolutional Layer" } repositories: { - url: "https://github.com/IVRL/FG-NIC" - owner: "IVRL" + url: "https://github.com/apoorvagnihotri/big-little-net" + owner: "apoorvagnihotri" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Fidelity-Guided Noisy Image Classification" + number_of_stars: 8 + description: "A CNN Architecture that makes use of multi-scale features for Object Recognition." } repositories: { - url: "https://github.com/vycezhong/byteps-compress" - owner: "vycezhong" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/tensorpack/benchmarks" + owner: "tensorpack" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 87 + description: "Use TensorFlow efficiently" } repositories: { - url: "https://github.com/YeLyuUT/VOSDetectron" - owner: "YeLyuUT" + url: "https://github.com/k0pch4/big-little-net" + owner: "k0pch4" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Combination of Mask RCNN with ConvGRU for video object segmentation" + number_of_stars: 8 + description: "A CNN Architecture that makes use of multi-scale features for Object Recognition." } repositories: { - url: "https://github.com/MarcAntoineAlex/darts" - owner: "MarcAntoineAlex" + url: "https://github.com/alldbi/SuperMix" + owner: "alldbi" framework: FRAMEWORK_PYTORCH + number_of_stars: 57 + description: "Pytorch implementation of CVPR2021 paper: SuperMix: Supervising the Mixing Data Augmentation" } repositories: { - url: "https://github.com/kikacaty/adv_guide" - owner: "kikacaty" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/jiajunhua/facebookresearch-Detectron" + owner: "jiajunhua" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/HRNet/Lite-HRNet" - owner: "HRNet" + url: "https://github.com/abcp4/MyDarts" + owner: "abcp4" framework: FRAMEWORK_PYTORCH - number_of_stars: 359 - description: "This is an official pytorch implementation of Lite-HRNet: A Lightweight High-Resolution Network. " } repositories: { - url: "https://github.com/serend1p1ty/SeqNet" - owner: "serend1p1ty" - framework: FRAMEWORK_PYTORCH - number_of_stars: 121 - description: "Code for AAAI 2021 paper: Sequential End-to-end Network for Efficient Person Search" + url: "https://github.com/lduml/blog" + owner: "lduml" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/ericyang789/Parallel-Compute-Project" - owner: "ericyang789" - framework: FRAMEWORK_OTHERS - description: "C implementation of t-SNE with parallelization optimization" + url: "https://github.com/darkreapyre/HaaS" + owner: "darkreapyre" + framework: FRAMEWORK_TENSORFLOW + description: "Proof of Concept for Horovod-as-a-Service" } methods: { name: "SGD" @@ -11488,8 +11565,8 @@ pr_id_to_video: { video: { video_id: "g3McZgloCJo" video_title: "PR-133: Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" - number_of_likes: 9 - number_of_views: 663 + number_of_likes: 10 + number_of_views: 677 published_date: { seconds: 1547454308 } @@ -11513,17 +11590,6 @@ pr_id_to_video: { authors: "Dimitris Tsipras" authors: "Andrew Ilyas" authors: "Aleksander Madry" - repositories: { - url: "https://github.com/yaoshiang/MobileNetV2-CIFAR-Cleverhans" - owner: "yaoshiang" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/AchintyaX/Brain_tumor_segmentation" - owner: "AchintyaX" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } repositories: { url: "https://github.com/utsawk/CarND-Traffic-Sign-Classifier-Project" owner: "utsawk" @@ -11552,12 +11618,23 @@ pr_id_to_video: { owner: "tobinthankachan1" framework: FRAMEWORK_OTHERS } + repositories: { + url: "https://github.com/AchintyaX/Brain_tumor_segmentation" + owner: "AchintyaX" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/yaoshiang/MobileNetV2-CIFAR-Cleverhans" + owner: "yaoshiang" + framework: FRAMEWORK_OTHERS + } } video: { video_id: "hiN0IMM50FM" video_title: "PR-134 How Does Batch Normalization Help Optimization?" number_of_likes: 14 - number_of_views: 1012 + number_of_views: 1035 published_date: { seconds: 1548117640 } @@ -11585,7 +11662,7 @@ pr_id_to_video: { video_id: "LSlBoNNbULg" video_title: "PR-135: Photo Wake-Up: 3D Character Animation from a Single Photo" number_of_likes: 55 - number_of_views: 2989 + number_of_views: 3018 published_date: { seconds: 1548003936 } @@ -11610,14 +11687,6 @@ pr_id_to_video: { authors: "Marvin Ritter" authors: "Mario Lucic" authors: "Neil Houlsby" - repositories: { - is_official: true - url: "https://github.com/google/compare_gan" - owner: "google" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1736 - description: "Compare GAN code." - } repositories: { url: "https://github.com/zhangqianhui/Self-Supervised-GANs" owner: "zhangqianhui" @@ -11629,9 +11698,17 @@ pr_id_to_video: { url: "https://github.com/vandit15/Self-Supervised-Gans-Pytorch" owner: "vandit15" framework: FRAMEWORK_PYTORCH - number_of_stars: 50 + number_of_stars: 51 description: "Ready to train Pytorch implementation of the CVPR'19 paper \"Self-Supervised GANs via Auxiliary Rotation Loss\"" } + repositories: { + is_official: true + url: "https://github.com/google/compare_gan" + owner: "google" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1738 + description: "Compare GAN code." + } methods: { name: "GAN" full_name: "Generative Adversarial Network" @@ -11646,8 +11723,8 @@ pr_id_to_video: { video: { video_id: "_wpDP-6afM4" video_title: "PR-136 Self-Supervised Generative Adversarial Networks" - number_of_likes: 25 - number_of_views: 1092 + number_of_likes: 27 + number_of_views: 1105 published_date: { seconds: 1547995361 } @@ -11674,28 +11751,11 @@ pr_id_to_video: { authors: "Yoshua Bengio" authors: "Aaron Courville" authors: "R Devon Hjelm" - repositories: { - url: "https://github.com/ahujak/KKLE" - owner: "ahujak" - framework: FRAMEWORK_OTHERS - description: "Estimating KL Divergence" - } - repositories: { - url: "https://github.com/sambklein/MINE_demo" - owner: "sambklein" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/gtegner/hyper-gan" - owner: "gtegner" - framework: FRAMEWORK_PYTORCH - description: "Uncertainty Estimation with HyperGANS in PyTorch!" - } repositories: { url: "https://github.com/MasanoriYamada/Mine_pytorch" owner: "MasanoriYamada" framework: FRAMEWORK_PYTORCH - number_of_stars: 156 + number_of_stars: 158 description: "MINE: Mutual Information Neural Estimation in pytorch (unofficial)" } repositories: { @@ -11720,7 +11780,7 @@ pr_id_to_video: { url: "https://github.com/dizcza/EmbedderSDR" owner: "dizcza" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 7 description: "Embedder with binary sparse distributed representation." } repositories: { @@ -11734,9 +11794,29 @@ pr_id_to_video: { url: "https://github.com/ChengzhangZhu/MINE" owner: "ChengzhangZhu" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 + number_of_stars: 15 description: "Keras implementation (only for tensorflow backend) of MINE: Mutual Information Neural Estimation" } + repositories: { + url: "https://github.com/mboudiaf/Mutual-Information-Variational-Bounds" + owner: "mboudiaf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 26 + description: "A Tensorflow implementation Mutual Information estimation methods" + } + repositories: { + url: "https://github.com/gtegner/mine-pytorch" + owner: "gtegner" + framework: FRAMEWORK_PYTORCH + number_of_stars: 59 + description: "Mutual Information Neural Estimation in Pytorch" + } + repositories: { + url: "https://github.com/burklight/MINE-PyTorch" + owner: "burklight" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } } } } @@ -11761,14 +11841,14 @@ pr_id_to_video: { url: "https://github.com/lmb-freiburg/Multimodal-Future-Prediction" owner: "lmb-freiburg" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 29 + number_of_stars: 30 description: "The official repository for the CVPR 2019 paper \"Overcoming Limitations of Mixture Density Networks: A Sampling and Fitting Framework for Multimodal Future Prediction\"" } } video: { video_id: "VORJQQUphuw" video_title: "PR-138: Mixture Density Network" - number_of_views: 2437 + number_of_views: 2549 published_date: { seconds: 1548599784 } @@ -11793,17 +11873,6 @@ pr_id_to_video: { authors: "João F. Henriques" authors: "Andrea Vedaldi" authors: "Philip H. S. Torr" - repositories: { - url: "https://github.com/logiklesuraj/siamfcex" - owner: "logiklesuraj" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/logiklesuraj/SiamFC" - owner: "logiklesuraj" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/suraj-maniyar/Object-Tracking-SSD300" owner: "suraj-maniyar" @@ -11821,9 +11890,20 @@ pr_id_to_video: { url: "https://github.com/shallowtoil/DROL" owner: "shallowtoil" framework: FRAMEWORK_PYTORCH - number_of_stars: 58 + number_of_stars: 59 description: "Discriminative and Robust Online Learning for Siamese Visual Tracking [AAAI2020]" } + repositories: { + url: "https://github.com/logiklesuraj/SiamFC" + owner: "logiklesuraj" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/logiklesuraj/siamfcex" + owner: "logiklesuraj" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } methods: { name: "Siamese Network" full_name: "Siamese Network" @@ -11833,8 +11913,8 @@ pr_id_to_video: { video: { video_id: "dv5yUl6Lw1g" video_title: "PR-139: Fully Convolutional Siamese Networks for Object Tracking" - number_of_likes: 39 - number_of_views: 2917 + number_of_likes: 43 + number_of_views: 3003 published_date: { seconds: 1549845265 } @@ -11862,7 +11942,7 @@ pr_id_to_video: { video_id: "_2l2UFIF08Q" video_title: "PR-140: Training Set Debugging Using Trusted Items" number_of_likes: 5 - number_of_views: 613 + number_of_views: 619 published_date: { seconds: 1549810486 } @@ -11889,14 +11969,6 @@ pr_id_to_video: { authors: "Wei Hua" authors: "Alan Yuille" authors: "Li Fei-Fei" - repositories: { - is_official: true - url: "https://github.com/tensorflow/models" - owner: "tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70579 - description: "Models and examples built with TensorFlow" - } repositories: { url: "https://github.com/Dawars/auto_deeplab-pytorch" owner: "Dawars" @@ -11908,16 +11980,24 @@ pr_id_to_video: { url: "https://github.com/MenghaoGuo/AutoDeeplab" owner: "MenghaoGuo" framework: FRAMEWORK_PYTORCH - number_of_stars: 381 + number_of_stars: 384 description: "Pytorch Implementation the paper Auto-DeepLab Hierarchical Neural Architecture Search for Semantic Image Segmentation" } repositories: { url: "https://github.com/NoamRosenberg/autodeeplab" owner: "NoamRosenberg" framework: FRAMEWORK_PYTORCH - number_of_stars: 271 + number_of_stars: 272 description: "AutoDeeplab / auto-deeplab / AutoML for semantic segmentation, implemented in Pytorch" } + repositories: { + is_official: true + url: "https://github.com/tensorflow/models" + owner: "tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70934 + description: "Models and examples built with TensorFlow" + } methods: { name: "Tanh Activation" full_name: "Tanh Activation" @@ -11943,7 +12023,7 @@ pr_id_to_video: { video_id: "ltlhQXHGzgE" video_title: "PR-141: Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic Image Segmentation" number_of_likes: 19 - number_of_views: 1704 + number_of_views: 1738 published_date: { seconds: 1550413961 } @@ -11967,69 +12047,66 @@ pr_id_to_video: { authors: "Soumith Chintala" authors: "Léon Bottou" repositories: { - url: "https://github.com/Sinestro38/qosf-qgan" - owner: "Sinestro38" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Exploring learnability and optimal hyperparameters of various quantum generative adversarial networks and quantum neural networks using Pennylane. " - } - repositories: { - url: "https://github.com/ChristophReich1996/Dirac-GAN" - owner: "ChristophReich1996" + url: "https://github.com/thstkdgus35/EDSR-PyTorch" + owner: "thstkdgus35" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "PyTorch reimplementation of the DiracGAN proposed in the paper \"Which Training Methods for GANs do actually Converge?\" [ICML 2018]." + number_of_stars: 1702 + description: "PyTorch version of the paper 'Enhanced Deep Residual Networks for Single Image Super-Resolution' (CVPRW 2017) " } repositories: { - url: "https://github.com/bhargavajs07/Packed-Wasserstein-GAN-with-GradientPenalty-Example" - owner: "bhargavajs07" + url: "https://github.com/karl-hajjar/Generative-Adversarial-Networks" + owner: "karl-hajjar" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + description: "This repository contains the report and code of a project on Generative Adversarial Networks carried out with a friend during my Master's year. " } repositories: { - url: "https://github.com/VitoRazor/Gan_Architecture" - owner: "VitoRazor" + url: "https://github.com/lilianweng/unified-gan-tensorflow" + owner: "lilianweng" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + number_of_stars: 94 + description: "A Tensorflow implementation of GAN, WGAN and WGAN with gradient penalty." } repositories: { - url: "https://github.com/sanghyun-son/EDSR-PyTorch" - owner: "sanghyun-son" + url: "https://github.com/SimoneDutto/EDSR" + owner: "SimoneDutto" framework: FRAMEWORK_PYTORCH - number_of_stars: 1679 - description: "PyTorch version of the paper 'Enhanced Deep Residual Networks for Single Image Super-Resolution' (CVPRW 2017) " } repositories: { - url: "https://github.com/shekkizh/WassersteinGAN.tensorflow" - owner: "shekkizh" + url: "https://github.com/bhargavajs07/Packed_WGAN_GP_Example" + owner: "bhargavajs07" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/lunz-s/DeepAdverserialRegulariser" + owner: "lunz-s" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 419 - description: "Tensorflow implementation of Wasserstein GAN - arxiv: https://arxiv.org/abs/1701.07875" + number_of_stars: 13 } repositories: { - url: "https://github.com/lab-ml/annotated_deep_learning_paper_implementations/tree/master/labml_nn/gan/wasserstein" - owner: "gan" + url: "https://github.com/NeuralVFX/wasserstein-gan" + owner: "NeuralVFX" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + description: "Implementation of Wasserstein GAN in PyTorch" } repositories: { - url: "https://github.com/Ars235/Novelty_Detection" - owner: "Ars235" + url: "https://github.com/charlescheng0117/pytorch-WGAN-GP-TTUR-CelebA" + owner: "charlescheng0117" framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of Adversarially Learned One-Class Classifier for Novelty Detection" + number_of_stars: 3 } repositories: { - url: "https://github.com/ChristophReich1996/Mode_Collapse" - owner: "ChristophReich1996" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Mode collapse example of GANs in 2D (PyTorch)." + url: "https://github.com/chiqunz/Unsupervised_Models" + owner: "chiqunz" + framework: FRAMEWORK_TENSORFLOW + description: "Unsupervised Learning Engines" } repositories: { - url: "https://github.com/rkem1542/EDSR-pytorch" - owner: "rkem1542" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/dabsdamoon/Anime-Colorization" + owner: "dabsdamoon" + framework: FRAMEWORK_OTHERS + number_of_stars: 11 + description: "I'm going to try coloring anime characters' gray image by using different algorithms" } methods: { name: "WGAN" @@ -12045,8 +12122,8 @@ pr_id_to_video: { video: { video_id: "tKQwlf-DAl0" video_title: "PR-142: Wasserstein GAN" - number_of_likes: 34 - number_of_views: 2031 + number_of_likes: 37 + number_of_views: 2108 published_date: { seconds: 1550412193 } @@ -12072,7 +12149,7 @@ pr_id_to_video: { video: { video_id: "APjGjwBR6o8" video_title: "PR-143: Recurrent World Models Facilitate Policy Evolution" - number_of_likes: 8 + number_of_likes: 7 number_of_views: 559 published_date: { seconds: 1551026446 @@ -12101,13 +12178,6 @@ pr_id_to_video: { authors: "Peter Jin" authors: "Sicheng Zhao" authors: "Kurt Keutzer" - repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" - framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" - } repositories: { url: "https://github.com/luuuyi/SqueezeNext.PyTorch" owner: "luuuyi" @@ -12119,7 +12189,7 @@ pr_id_to_video: { url: "https://github.com/Timen/squeezenext-tensorflow" owner: "Timen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 54 + number_of_stars: 53 description: "A tensorflow implementation of squeezenext. (includes link to trained model)" } repositories: { @@ -12131,7 +12201,14 @@ pr_id_to_video: { url: "https://github.com/amirgholami/SqueezeNext" owner: "amirgholami" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 105 + number_of_stars: 104 + } + repositories: { + url: "https://github.com/osmr/imgclsmob" + owner: "osmr" + framework: FRAMEWORK_OTHERS + number_of_stars: 2268 + description: "Sandbox for training deep learning networks" } methods: { name: "Softmax" @@ -12187,8 +12264,8 @@ pr_id_to_video: { video: { video_id: "WReWeADJ3Pw" video_title: "PR-144: SqueezeNext: Hardware-Aware Neural Network Design" - number_of_likes: 34 - number_of_views: 2035 + number_of_likes: 36 + number_of_views: 2065 published_date: { seconds: 1551018415 } @@ -12264,7 +12341,7 @@ pr_id_to_video: { video_id: "f5zULULWUwM" video_title: "PR-145: Language Models are Unsupervised Multitask Learners (OpenAI GPT-2)" number_of_likes: 13 - number_of_views: 971 + number_of_views: 984 published_date: { seconds: 1552226192 } @@ -12287,11 +12364,11 @@ pr_id_to_video: { authors: "Hei Law" authors: "Jia Deng" repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" + is_official: true + url: "https://github.com/princeton-vl/CornerNet" + owner: "princeton-vl" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" + number_of_stars: 2227 } repositories: { url: "https://github.com/egeonat/MS-CornerNet" @@ -12301,17 +12378,34 @@ pr_id_to_video: { description: "An extension of the CornerNet architecture for RGB+T image inputs" } repositories: { - is_official: true - url: "https://github.com/princeton-vl/CornerNet" - owner: "princeton-vl" + url: "https://github.com/open-mmlab/mmdetection" + owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 2218 + number_of_stars: 16041 + description: "OpenMMLab Detection Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/gau-nernst/CenterNet" + owner: "gau-nernst" + framework: FRAMEWORK_PYTORCH + number_of_stars: 26 + description: "Implementation of CenterNet and FairMOT with PyTorch Lightning" + } + methods: { + name: "Random Horizontal Flip" + full_name: "Random Horizontal Flip" + description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" } methods: { name: "Hourglass Module" full_name: "Hourglass Module" description: "An **Hourglass Module** is an image block module used mainly for pose estimation tasks. The design of the hourglass is motivated by the need to capture information at every scale. While local evidence is essential for identifying features like faces and hands, a final pose estimate requires a coherent understanding of the full body. The person’s orientation, the arrangement of their limbs, and the relationships of adjacent joints are among the many cues that are best recognized at different scales in the image. The hourglass is a simple, minimal design that has the capacity to capture all of these features and bring them together to output pixel-wise predictions.\r\n\r\nThe network must have some mechanism to effectively process and consolidate features across scales. The Hourglass uses a single pipeline with skip layers to preserve spatial information at each resolution. The network reaches its lowest resolution at 4x4 pixels allowing smaller spatial filters to be applied that compare features across the entire space of the image.\r\n\r\nThe hourglass is set up as follows: Convolutional and max pooling layers are used to process features down to a very low resolution. At each max pooling step, the network branches off and applies more convolutions at the original pre-pooled resolution. After reaching the lowest resolution, the network begins the top-down sequence of upsampling and combination of features across scales. To bring together information across two adjacent resolutions, we do nearest neighbor upsampling of the lower resolution followed by an elementwise addition of the two sets of features. The topology of the hourglass is symmetric, so for every layer present on the way down there is a corresponding layer going up.\r\n\r\nAfter reaching the output resolution of the network, two consecutive rounds of 1x1 convolutions are applied to produce the final network predictions. The output of the network is a set of heatmaps where for a given heatmap the network predicts the probability of a joint’s presence at each and every pixel." } + methods: { + name: "CornerNet" + full_name: "CornerNet" + description: "**CornerNet** is an object detection model that detects an object bounding box as a pair of keypoints, the top-left corner and the bottom-right corner, using a single convolution neural network. By detecting objects as paired keypoints, we eliminate the need for designing a set of anchor boxes commonly used in prior single-stage detectors. It also utilises corner pooling, a new type of pooling layer than helps the network better localize corners." + } methods: { name: "Stacked Hourglass Network" full_name: "Stacked Hourglass Network" @@ -12327,26 +12421,6 @@ pr_id_to_video: { full_name: "Residual Connection" description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "CornerNet" - full_name: "CornerNet" - description: "**CornerNet** is an object detection model that detects an object bounding box as a pair of keypoints, the top-left corner and the bottom-right corner, using a single convolution neural network. By detecting objects as paired keypoints, we eliminate the need for designing a set of anchor boxes commonly used in prior single-stage detectors. It also utilises corner pooling, a new type of pooling layer than helps the network better localize corners." - } methods: { name: "Corner Pooling" full_name: "Corner Pooling" @@ -12357,12 +12431,22 @@ pr_id_to_video: { full_name: "Non Maximum Suppression" description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Focal Loss" + full_name: "Focal Loss" + description: "A **Focal Loss** function addresses class imbalance during training in tasks like object detection. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. It is a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + } } video: { video_id: "6OYmOtivQY8" video_title: "PR-146: CornerNet: Detecting Objects as Paired Keypoints" number_of_likes: 24 - number_of_views: 1904 + number_of_views: 1948 published_date: { seconds: 1570081370 } @@ -12390,7 +12474,7 @@ pr_id_to_video: { video_id: "7lyxexSjshc" video_title: "PR-147: Learning Deep Structure-Preserving Image-Text Embeddings" number_of_likes: 20 - number_of_views: 578 + number_of_views: 579 published_date: { seconds: 1552667121 } @@ -12437,7 +12521,7 @@ pr_id_to_video: { video_id: "TgJuUxtLO3s" video_title: "PR-148 deep anomaly detection using geometric transformations" number_of_likes: 25 - number_of_views: 1644 + number_of_views: 1668 published_date: { seconds: 1552831505 } @@ -12461,36 +12545,39 @@ pr_id_to_video: { authors: "Alexandre Alahi" authors: "Li Fei-Fei" repositories: { - url: "https://github.com/jayChung0302/DeepFilter" - owner: "jayChung0302" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "make cool image filters for SNS applications" + url: "https://github.com/milmor/perceptual-losses-neural-style" + owner: "milmor" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "Perceptual Losses for Real-Time Style Transfer and Super-Resolution Tensorflow 2 implementation" } repositories: { - url: "https://github.com/Josien94/MLiP" - owner: "Josien94" + url: "https://github.com/anujdutt9/Artistic-Style-Transfer-using-Keras-Tensorflow" + owner: "anujdutt9" framework: FRAMEWORK_TENSORFLOW - description: "This repository contains code and supplementary material for participated Kaggle Challenges." + number_of_stars: 22 + description: "Art to Image Style Transfer using Keras and Tensorflow." } repositories: { - url: "https://github.com/Arthur-ZHAO-001/Fast-style-transfer" - owner: "Arthur-ZHAO-001" + url: "https://github.com/harunshimanto/Neural-Style-Transfer-of-Artistic-Style" + owner: "harunshimanto" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + description: "Art to Image Style Transfer using Keras " } repositories: { - url: "https://github.com/milmor/perceptual-losses-neural-style" - owner: "milmor" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Perceptual Losses for Real-Time Style Transfer and Super-Resolution Tensorflow 2 implementation" + url: "https://github.com/DmitryUlyanov/texture_nets" + owner: "DmitryUlyanov" + framework: FRAMEWORK_OTHERS + number_of_stars: 1175 + description: "Code for \"Texture Networks: Feed-forward Synthesis of Textures and Stylized Images\" paper." } repositories: { - url: "https://github.com/vijishmadhavan/SkinDeep" - owner: "vijishmadhavan" - framework: FRAMEWORK_PYTORCH - number_of_stars: 707 - description: "Get Deinked!!" + url: "https://github.com/ryanchankh/style_transfer" + owner: "ryanchankh" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "Implementation of Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. 2016. “Image Style Transfer Using Convolutional Neural Networks.”" } repositories: { url: "https://github.com/back8/github_vijishmadhavan_ArtLine" @@ -12499,36 +12586,35 @@ pr_id_to_video: { number_of_stars: 1 } repositories: { - url: "https://github.com/kynk94/TF2-Image-Generation" - owner: "kynk94" + url: "https://github.com/thatbrguy/Dehaze-GAN" + owner: "thatbrguy" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Tensorflow2 reimplementation of image generation model (GAN, Style Transfer, Image to Image Translation, etc)" + number_of_stars: 69 + description: "TensorFlow code for Single Image Haze Removal using a Generative Adversarial Network" } repositories: { - url: "https://github.com/rrrepsac/tb_vc" - owner: "rrrepsac" + url: "https://github.com/Kumara-Kaushik/proba_v_challenge" + owner: "Kumara-Kaushik" + framework: FRAMEWORK_OTHERS + description: "This repository contains My attempt at he proba-v challenge." + } + repositories: { + url: "https://github.com/noufali/VideoML" + owner: "noufali" framework: FRAMEWORK_PYTORCH - description: "telebot" } repositories: { url: "https://github.com/vijishmadhavan/Toon-Me" owner: "vijishmadhavan" framework: FRAMEWORK_PYTORCH - number_of_stars: 318 + number_of_stars: 326 description: "A Deep Learning project to Toon Portrait Images" } - repositories: { - url: "https://github.com/WalterJohnson0/tf-keras-implementation-of-Image-Style-transformation-network" - owner: "WalterJohnson0" - framework: FRAMEWORK_TENSORFLOW - description: "Computer Vision Final Project- implementation of Neural style transfer" - } } video: { video_id: "OKDaGzeUz4U" video_title: "PR-149: Perceptual Losses for Real-Time Style Transfer and Super-Resolution" - number_of_views: 2162 + number_of_views: 2223 published_date: { seconds: 1552832996 } @@ -12554,11 +12640,25 @@ pr_id_to_video: { authors: "Matthias Bethge" authors: "Felix A. Wichmann" authors: "Wieland Brendel" + repositories: { + url: "https://github.com/mbuet2ner/local-global-features-cnn" + owner: "mbuet2ner" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Code for my Master's Thesis: \"The Role of Local Versus Global Features in Convolutional Neural Networks\"" + } + repositories: { + url: "https://github.com/LiYingwei/ShapeTextureDebiasedTraining" + owner: "LiYingwei" + framework: FRAMEWORK_PYTORCH + number_of_stars: 83 + description: "Code and models for the paper Shape-Texture Debiased Neural Network Training (ICLR 2021)" + } repositories: { url: "https://github.com/facebookresearch/augmentation-corruption" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 13 description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." } repositories: { @@ -12568,27 +12668,20 @@ pr_id_to_video: { description: "Breast Cancer biopsy image analysis using CNN" } repositories: { - url: "https://github.com/LiYingwei/ShapeTextureDebiasedTraining" - owner: "LiYingwei" + is_official: true + url: "https://github.com/rgeirhos/Stylized-ImageNet" + owner: "rgeirhos" framework: FRAMEWORK_PYTORCH - number_of_stars: 74 - description: "Code and models for the paper Shape-Texture Debiased Neural Network Training (ICLR 2021)" + number_of_stars: 379 + description: "Code to create Stylized-ImageNet, a stylized version of standard ImageNet (ICLR 2019 Oral)" } repositories: { - is_official: true url: "https://github.com/rgeirhos/texture-vs-shape" owner: "rgeirhos" framework: FRAMEWORK_PYTORCH - number_of_stars: 603 + number_of_stars: 611 description: "Pre-trained models, data, code & materials from the paper \"ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness\" (ICLR 2019 Oral)" } - repositories: { - url: "https://github.com/mbuet2ner/local-global-features-cnn" - owner: "mbuet2ner" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Code for my Master's Thesis: \"The Role of Local Versus Global Features in Convolutional Neural Networks\"" - } methods: { name: "Global Average Pooling" full_name: "Global Average Pooling" @@ -12643,8 +12736,8 @@ pr_id_to_video: { video: { video_id: "oBapZTL8LsE" video_title: "PR-150: ImageNet-trained CNNs are Biased Towards Textures" - number_of_likes: 17 - number_of_views: 1122 + number_of_likes: 19 + number_of_views: 1137 published_date: { seconds: 1553435404 } @@ -12670,60 +12763,69 @@ pr_id_to_video: { authors: "Eli Shechtman" authors: "Oliver Wang" repositories: { - url: "https://github.com/tding1/CDFI" - owner: "tding1" + is_official: true + url: "https://github.com/richzhang/PerceptualSimilarity" + owner: "richzhang" framework: FRAMEWORK_PYTORCH - number_of_stars: 63 - description: "Code of paper \"CDFI: Compression-Driven Network Design for Frame Interpolation\", CVPR 2021" + number_of_stars: 1802 + description: "LPIPS metric. pip install lpips" } repositories: { - url: "https://github.com/RudreshVeerkhare/StyleGan" - owner: "RudreshVeerkhare" + url: "https://github.com/woctezuma/stylegan2-projecting-images" + owner: "woctezuma" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + number_of_stars: 169 + description: "Projecting images to latent space with StyleGAN2." } repositories: { - url: "https://github.com/cassava-math-ubb/experiments" - owner: "cassava-math-ubb" + url: "https://github.com/Puzer/stylegan-encoder" + owner: "Puzer" framework: FRAMEWORK_TENSORFLOW - description: "This repo contains our experimental approaches. " + number_of_stars: 930 + description: "StyleGAN Encoder - converts real images to latent space" } repositories: { - url: "https://github.com/ak9250/stylegan-art" - owner: "ak9250" + url: "https://github.com/isaacschaal/SG_training" + owner: "isaacschaal" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 346 - description: "train stylegan through transfer learning" } repositories: { - url: "https://github.com/ayushgupta9198/stylegan" - owner: "ayushgupta9198" - framework: FRAMEWORK_TENSORFLOW - description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" + url: "https://github.com/EndyWon/Deep-Feature-Perturbation" + owner: "EndyWon" + framework: FRAMEWORK_OTHERS + number_of_stars: 27 + description: "Code and data of “Diversified Arbitrary Style Transfer via Deep Feature Perturbation” (CVPR 2020)" } repositories: { - url: "https://github.com/isaacschaal/SG_training" - owner: "isaacschaal" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/kozistr/gan-metrics" + owner: "kozistr" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Lots of evaluation metrics for the generative adversarial networks in pytorch" } repositories: { - url: "https://github.com/stefkim/stylegan-batik" - owner: "stefkim" + url: "https://github.com/ashutosh1919/FaceGenerationStyleGAN" + owner: "ashutosh1919" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "The objective of this project is to generate end to end Face Generation System using StyleGAN." } repositories: { - url: "https://github.com/MrWednes/CopyNVlab" - owner: "MrWednes" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/S-aiueo32/lpips-pytorch" + owner: "S-aiueo32" + framework: FRAMEWORK_PYTORCH + number_of_stars: 77 + description: "A simple and useful implementation of LPIPS." } repositories: { - url: "https://github.com/khurram702/StyleBasedGAN" - owner: "khurram702" + url: "https://github.com/pbaylies/stylegan-encoder" + owner: "pbaylies" framework: FRAMEWORK_TENSORFLOW - description: "Style Base Architecture of Generator" + number_of_stars: 623 + description: "StyleGAN Encoder - converts real images to latent space" } repositories: { - url: "https://github.com/ayushgupta9198/gan" + url: "https://github.com/ayushgupta9198/stylegan" owner: "ayushgupta9198" framework: FRAMEWORK_TENSORFLOW description: "The model is based for fake person creation based on stylegan technique. I have trained the model on my data set and generates the result basis of grids and seeds" @@ -12768,7 +12870,7 @@ pr_id_to_video: { video_id: "VDeJFb5jt5M" video_title: "PR-151: The Unreasonable Effectiveness of Deep Features as a Perceptual Metric" number_of_likes: 7 - number_of_views: 727 + number_of_views: 743 published_date: { seconds: 1553438571 } @@ -12795,70 +12897,75 @@ pr_id_to_video: { authors: "Sunghun Kim" authors: "Jaegul Choo" repositories: { - url: "https://github.com/flystarhe/dicom-enhance" - owner: "flystarhe" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "dicom enhance" + url: "https://github.com/noah-rush/Portrait-Genre-GANs" + owner: "noah-rush" + framework: FRAMEWORK_OTHERS + description: "Using Generative Adversarial Networks to transform portraits from one artistic genre to another. " } repositories: { - url: "https://github.com/Kal213/StarGAN-Tutorial-Tensorflow-2.3" - owner: "Kal213" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "Intuitive StarGAN Code written in Tensorflow 2.3" + url: "https://github.com/yunjey/StarGAN" + owner: "yunjey" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4883 + description: "StarGAN - Official PyTorch Implementation (CVPR 2018)" } repositories: { - url: "https://github.com/MACderRu/StarGan_pytorch" - owner: "MACderRu" + url: "https://github.com/yaxingwang/SDIT" + owner: "yaxingwang" framework: FRAMEWORK_PYTORCH - description: "My implementation of StarGan paper" + number_of_stars: 27 + description: "ACM-MM2019" } repositories: { - url: "https://github.com/Masao-Taketani/StarGAN-tf2" - owner: "Masao-Taketani" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/cosmic119/StarGAN" + owner: "cosmic119" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "TensorFlow 2 Implementation of \"StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation\"." } repositories: { - url: "https://github.com/hello-world-cc/starGANv1-Pytorch" - owner: "hello-world-cc" + url: "https://github.com/SummerHuiZhang/StarGAN_Norland" + owner: "SummerHuiZhang" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/shaominghe/stargan_adience" - owner: "shaominghe" + url: "https://github.com/SummerHuiZhang/StarGAN_test" + owner: "SummerHuiZhang" framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "seasons transfer with StarGAN" } repositories: { - url: "https://github.com/stevebong31/stargan" - owner: "stevebong31" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/taki0112/StarGAN-Tensorflow" + owner: "taki0112" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 708 + description: "Simple Tensorflow implementation of StarGAN (CVPR 2018 Oral)" } repositories: { - url: "https://github.com/nguyen-nhat-anh/Star-GAN" - owner: "nguyen-nhat-anh" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/dipjyoti92/StarGAN-Voice-Conversion-2" + owner: "dipjyoti92" + framework: FRAMEWORK_PYTORCH + number_of_stars: 13 + description: "A Pytorch implementation of StarGAN-VC2" } repositories: { - url: "https://github.com/shridhivyah/starGAN" - owner: "shridhivyah" - framework: FRAMEWORK_TENSORFLOW - description: "FaceAttributeChange_StarGAN" + url: "https://github.com/sitharakannan/inf" + owner: "sitharakannan" + framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/aditiasthana1004/StarGAN" - owner: "aditiasthana1004" - framework: FRAMEWORK_OTHERS - description: "StarGAN" + url: "https://github.com/eriklindernoren/PyTorch-GAN" + owner: "eriklindernoren" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9972 + description: "PyTorch implementations of Generative Adversarial Networks." } } video: { video_id: "i3-rTEFpyv0" video_title: "PR-152:StarGAN: Unified Generative Adversarial Networks for Multi-Domain Image-to-Image Translation" number_of_likes: 21 - number_of_views: 1606 + number_of_views: 1616 published_date: { seconds: 1554040628 } @@ -12928,7 +13035,7 @@ pr_id_to_video: { video_id: "zGrwpa5-_0Y" video_title: "PR-153: SNAIL: A Simple Neural Attentive Meta-Learner" number_of_likes: 11 - number_of_views: 935 + number_of_views: 950 published_date: { seconds: 1554043097 } @@ -12952,54 +13059,25 @@ pr_id_to_video: { authors: "Ming-Yu Liu" authors: "Ting-Chun Wang" authors: "Jun-Yan Zhu" - repositories: { - url: "https://github.com/KushajveerSingh/SPADE-PyTorch" - owner: "KushajveerSingh" - framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "PyTorch unofficial implementation of Semantic Image Synthesis with Spatially-Adaptive Normalization paper by Nvidia Research" - } - repositories: { - url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter06" - owner: "master" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" - } - repositories: { - url: "https://github.com/GrahamRigby/GauGanPlus" - owner: "GrahamRigby" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/AhmedAmraniAkdi/BudgetNvidiaGaugan" - owner: "AhmedAmraniAkdi" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/LoganOneal/neuralpaint-server" - owner: "LoganOneal" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/noyoshi/hacksc" owner: "noyoshi" framework: FRAMEWORK_PYTORCH - number_of_stars: 197 + number_of_stars: 196 description: "🖌 photorealistic drawings from simple sketches using NVIDIA's GauGAN " } repositories: { url: "https://github.com/noyoshi/smart-sketch" owner: "noyoshi" framework: FRAMEWORK_PYTORCH - number_of_stars: 197 + number_of_stars: 196 description: "🖌 photorealistic drawings from simple sketches using NVIDIA's GauGAN " } repositories: { url: "https://github.com/taki0112/SPADE-Tensorflow" owner: "taki0112" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 316 + number_of_stars: 317 description: "Simple Tensorflow implementation of \"Semantic Image Synthesis with Spatially-Adaptive Normalization\" a.k.a. GauGAN, SPADE (CVPR 2019 Oral)" } repositories: { @@ -13008,6 +13086,13 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH description: "NVidia netural network for sketches" } + repositories: { + url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter06" + owner: "master" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 75 + description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" + } repositories: { url: "https://github.com/manicman1999/StyleGAN-Keras" owner: "manicman1999" @@ -13015,12 +13100,40 @@ pr_id_to_video: { number_of_stars: 160 description: "StyleGAN made with Keras" } + repositories: { + url: "https://github.com/tinawu-23/smart-sketch" + owner: "tinawu-23" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "[USC HackSC] A web interface that converts simple sketches into photorealistic images using NVIDIA's GuaGAN: devpost.com/software/hacksc-jtsc04" + } + repositories: { + is_official: true + url: "https://github.com/NVlabs/SPADE" + owner: "NVlabs" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6649 + description: "Semantic Image Synthesis with SPADE" + } + repositories: { + url: "https://github.com/divyanshj16/SPADE" + owner: "divyanshj16" + framework: FRAMEWORK_PYTORCH + number_of_stars: 63 + description: "\"Semantic Image Synthesis with Spatially-Adaptive Normalization\" paper implementation" + } + repositories: { + url: "https://github.com/llDataSciencell/SmartSketchNvidiaSpadeForWindows" + owner: "llDataSciencell" + framework: FRAMEWORK_PYTORCH + description: "Windows source code for SmartSketch which is drawing tool for NVIDIA SPADE." + } } video: { video_id: "1nJf35TSYtE" video_title: "PR-154: Semantic Image Synthesis with Spatially-Adaptive Normalization" number_of_likes: 20 - number_of_views: 1497 + number_of_views: 1530 published_date: { seconds: 1554651283 } @@ -13044,18 +13157,6 @@ pr_id_to_video: { authors: "Alexander Kirillov" authors: "Ross Girshick" authors: "Kaiming He" - repositories: { - url: "https://github.com/JihaoLee/Randomly_Wired_reproducibility" - owner: "JihaoLee" - framework: FRAMEWORK_PYTORCH - description: "This is a reimplementation of Exploring Randomly Wired Neural Networks for Image Recognition" - } - repositories: { - url: "https://github.com/wolszhang/randWireNN" - owner: "wolszhang" - framework: FRAMEWORK_OTHERS - description: "compare different randomly wired neural network" - } repositories: { url: "https://github.com/swdsld/RandWire_tensorflow" owner: "swdsld" @@ -13067,7 +13168,7 @@ pr_id_to_video: { url: "https://github.com/leaderj1001/RandWireNN" owner: "leaderj1001" framework: FRAMEWORK_PYTORCH - number_of_stars: 78 + number_of_stars: 80 description: "Implementing Randomly Wired Neural Networks for Image Recognition, Using CIFAR-10 dataset, CIFAR-100 dataset" } repositories: { @@ -13081,7 +13182,7 @@ pr_id_to_video: { url: "https://github.com/seungwonpark/RandWireNN" owner: "seungwonpark" framework: FRAMEWORK_PYTORCH - number_of_stars: 678 + number_of_stars: 679 description: "Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" } repositories: { @@ -13102,16 +13203,28 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/pycls" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1648 + number_of_stars: 1674 description: "Codebase for Image Classification Research, written in PyTorch." } repositories: { url: "https://github.com/JiaminRen/RandWireNN" owner: "JiaminRen" framework: FRAMEWORK_PYTORCH - number_of_stars: 270 + number_of_stars: 272 description: "Pytorch Implementation of: \"Exploring Randomly Wired Neural Networks for Image Recognition\"" } + repositories: { + url: "https://github.com/wolszhang/randWireNN" + owner: "wolszhang" + framework: FRAMEWORK_OTHERS + description: "compare different randomly wired neural network" + } + repositories: { + url: "https://github.com/JihaoLee/Randomly_Wired_reproducibility" + owner: "JihaoLee" + framework: FRAMEWORK_PYTORCH + description: "This is a reimplementation of Exploring Randomly Wired Neural Networks for Image Recognition" + } methods: { name: "Weight Decay" full_name: "Weight Decay" @@ -13167,7 +13280,7 @@ pr_id_to_video: { video_id: "qnGm1h365tc" video_title: "PR-155: Exploring Randomly Wired Neural Networks for Image Recognition" number_of_likes: 91 - number_of_views: 4146 + number_of_views: 4168 published_date: { seconds: 1554649684 } @@ -13190,13 +13303,6 @@ pr_id_to_video: { authors: "Hongyang Gao" authors: "Zhengyang Wang" authors: "Shuiwang Ji" - repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" - framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" - } repositories: { url: "https://github.com/HongyangGao/ChannelNets" owner: "HongyangGao" @@ -13204,6 +13310,13 @@ pr_id_to_video: { number_of_stars: 76 description: "Tensorflow Implementation of ChannelNets (NeurIPS 18)" } + repositories: { + url: "https://github.com/osmr/imgclsmob" + owner: "osmr" + framework: FRAMEWORK_OTHERS + number_of_stars: 2268 + description: "Sandbox for training deep learning networks" + } methods: { name: "Dense Connections" full_name: "Dense Connections" @@ -13214,7 +13327,7 @@ pr_id_to_video: { video_id: "oZbKWOBfNhk" video_title: "PR-156: ChannelNets: Compact and Efficient CNN via Channel-Wise Convolutions" number_of_likes: 1 - number_of_views: 226 + number_of_views: 234 published_date: { seconds: 1565744830 } @@ -13241,7 +13354,7 @@ pr_id_to_video: { video: { video_id: "JbXdn44myP4" video_title: "PR-157: Best of both worlds: human-machine collaboration for object annotation" - number_of_views: 353 + number_of_views: 357 published_date: { seconds: 1556532811 } @@ -13267,24 +13380,11 @@ pr_id_to_video: { authors: "Dagui Chen" authors: "Yu Qiao" authors: "Junjie Yan" - repositories: { - url: "https://github.com/ArashJavan/FOTS" - owner: "ArashJavan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - } - repositories: { - url: "https://github.com/Kaushal28/FOTS-PyTorch" - owner: "Kaushal28" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "PyTorch Implementation of Fast Oriented Text Spotting (FOTS)" - } repositories: { url: "https://github.com/xieyufei1993/FOTS" owner: "xieyufei1993" framework: FRAMEWORK_PYTORCH - number_of_stars: 172 + number_of_stars: 171 description: "An Implementation of the FOTS: Fast Oriented Text Spotting with a Unified Network" } repositories: { @@ -13305,16 +13405,29 @@ pr_id_to_video: { url: "https://github.com/jiangxiluning/FOTS.PyTorch" owner: "jiangxiluning" framework: FRAMEWORK_PYTORCH - number_of_stars: 537 + number_of_stars: 544 description: "FOTS Pytorch Implementation" } repositories: { url: "https://github.com/Masao-Taketani/FOTS_OCR" owner: "Masao-Taketani" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 + number_of_stars: 43 description: "TensorFlow Implementation of FOTS, Fast Oriented Text Spotting with a Unified Network." } + repositories: { + url: "https://github.com/Kaushal28/FOTS-PyTorch" + owner: "Kaushal28" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "PyTorch Implementation of Fast Oriented Text Spotting (FOTS)" + } + repositories: { + url: "https://github.com/ArashJavan/FOTS" + owner: "ArashJavan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + } methods: { name: "Convolution" full_name: "Convolution" @@ -13325,7 +13438,7 @@ pr_id_to_video: { video_id: "hOFViMbYnrs" video_title: "PR-158: FOTS: Fast Oriented Text Spotting with a Unified Network" number_of_likes: 25 - number_of_views: 1005 + number_of_views: 1025 published_date: { seconds: 1556529052 } @@ -13355,14 +13468,15 @@ pr_id_to_video: { url: "https://github.com/cchen-cc/SIFA" owner: "cchen-cc" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 154 + number_of_stars: 160 + description: "[TMI'20, AAAI'19] Synergistic Image and Feature Adaptation" } } video: { video_id: "sR7hBJGpwQo" video_title: "PR-159: SIFA: Towards Cross- Modality Domain Adaptation for Medical Image Segmentation" number_of_likes: 10 - number_of_views: 480 + number_of_views: 485 published_date: { seconds: 1557132628 } @@ -13436,7 +13550,7 @@ pr_id_to_video: { video_id: "V9KusGzsx10" video_title: "PR-160: GLoMo Unsupervised Learning of Transferable Relational Graph" number_of_likes: 13 - number_of_views: 627 + number_of_views: 635 published_date: { seconds: 1557076158 } @@ -13463,73 +13577,70 @@ pr_id_to_video: { authors: "Quoc V. Le" authors: "Ruslan Salakhutdinov" repositories: { - url: "https://github.com/SambhawDrag/XLNet.jl" - owner: "SambhawDrag" - framework: FRAMEWORK_TENSORFLOW - description: "A Julia-based implementation of XLNet: A Generalized Autoregressive Pretraining for Language Understanding. < Flux | JuliaText >" - } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/transformer-xl" - owner: "language_model" - framework: FRAMEWORK_OTHERS - number_of_stars: 1489 - description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." - } - repositories: { - url: "https://github.com/huggingface/transformers" - owner: "huggingface" + url: "https://github.com/threelittlemonkeys/transformer-pytorch" + owner: "threelittlemonkeys" framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 - description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + number_of_stars: 9 + description: "The Transformer in PyTorch" } repositories: { - url: "https://github.com/facebookresearch/code-prediction-transformer" - owner: "facebookresearch" + url: "https://github.com/inzva/fake-academic-paper-generation" + owner: "inzva" framework: FRAMEWORK_PYTORCH - number_of_stars: 64 - description: "This repo will contain replication package for the paper \"Feeding Trees to Transformers for Code Completion\"" + number_of_stars: 32 + description: "inzva AI Projects #2 - Fake Academic Paper Generation Project" } repositories: { - url: "https://github.com/lab-ml/nn/tree/master/labml_nn/transformers/xl" - owner: "transformers" + url: "https://github.com/sh951011/Attention-Implementation" + owner: "sh951011" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/Jmkernes/PAR-Transformer-XL" - owner: "Jmkernes" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - description: "An implementation of the Pay Attention when Required transformer: https://arxiv.org/pdf/2009.04534.pdf" + number_of_stars: 62 + description: "PyTorch implementation of some attentions for Deep Learning Researchers. " } repositories: { - url: "https://github.com/sooftware/conformer" + url: "https://github.com/sooftware/Attention-Implementation" owner: "sooftware" framework: FRAMEWORK_PYTORCH - number_of_stars: 150 - description: "PyTorch implementation of \"Conformer: Convolution-augmented Transformer for Speech Recognition\" (INTERSPEECH 2020)" + number_of_stars: 62 + description: "PyTorch implementation of some attentions for Deep Learning Researchers. " } repositories: { url: "https://github.com/sooftware/nlp-attentions" owner: "sooftware" framework: FRAMEWORK_PYTORCH - number_of_stars: 54 + number_of_stars: 62 description: "PyTorch implementation of some attentions for Deep Learning Researchers. " } repositories: { - url: "https://github.com/sooftware/Attention-Implementation" - owner: "sooftware" + url: "https://github.com/TimDettmers/transformer-xl" + owner: "TimDettmers" framework: FRAMEWORK_PYTORCH number_of_stars: 54 - description: "PyTorch implementation of some attentions for Deep Learning Researchers. " } repositories: { - url: "https://github.com/sh951011/Attention-Implementation" - owner: "sh951011" + url: "https://github.com/huggingface/xlnet" + owner: "huggingface" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 25 + description: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" + } + repositories: { + url: "https://github.com/listenviolet/XLNet" + owner: "listenviolet" framework: FRAMEWORK_PYTORCH - number_of_stars: 54 - description: "PyTorch implementation of some attentions for Deep Learning Researchers. " + } + repositories: { + url: "https://github.com/benkrause/dynamiceval-transformer" + owner: "benkrause" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 44 + } + repositories: { + url: "https://github.com/cedrickchee/pytorch-pretrained-BERT" + owner: "cedrickchee" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "PyTorch version of Google AI's BERT model with script to load Google's pre-trained models" } methods: { name: "Variational Dropout" @@ -13585,8 +13696,8 @@ pr_id_to_video: { video: { video_id: "lSTljZy8ag4" video_title: "PR-161: Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" - number_of_likes: 36 - number_of_views: 2246 + number_of_likes: 37 + number_of_views: 2291 published_date: { seconds: 1557744220 } @@ -13615,7 +13726,7 @@ pr_id_to_video: { video_id: "AqStpR29lTA" video_title: "PR-162: DeepPermNet: Visual Permutation Learning" number_of_likes: 2 - number_of_views: 313 + number_of_views: 317 published_date: { seconds: 1557675917 } @@ -13642,8 +13753,8 @@ pr_id_to_video: { video: { video_id: "Dvi5_YC8Yts" video_title: "PR-163: CNN Attention Networks" - number_of_likes: 130 - number_of_views: 8460 + number_of_likes: 137 + number_of_views: 8739 published_date: { seconds: 1558274434 } @@ -13670,7 +13781,7 @@ pr_id_to_video: { url: "https://github.com/zacheberhart/Convolutional-Disentangled-Variational-Autoencoder" owner: "zacheberhart" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 6 description: "A Convolutional β-VAE in PyTorch based loosely off of the Conv VAE used in the World Models research paper." } repositories: { @@ -13684,14 +13795,14 @@ pr_id_to_video: { url: "https://github.com/Saswatm123/MMD-VAE" owner: "Saswatm123" framework: FRAMEWORK_PYTORCH - number_of_stars: 34 + number_of_stars: 35 description: "Pytorch implementation of Maximum Mean Discrepancy Variational Autoencoder, a member of the InfoVAE family that maximizes Mutual Information between the Isotropic Gaussian Prior (as the latent space) and the Data Distribution." } repositories: { url: "https://github.com/AntixK/PyTorch-VAE" owner: "AntixK" framework: FRAMEWORK_PYTORCH - number_of_stars: 2173 + number_of_stars: 2276 description: "A Collection of Variational Autoencoders (VAE) in PyTorch." } repositories: { @@ -13705,7 +13816,7 @@ pr_id_to_video: { video: { video_id: "29QcXLoYC60" video_title: "PR-164: InfoVAE: Balancing Learning and Inference in Variational Autoencoders" - number_of_views: 594 + number_of_views: 600 published_date: { seconds: 1558883112 } @@ -13729,24 +13840,6 @@ pr_id_to_video: { authors: "Aliaksandra Shysheya" authors: "Egor Burkov" authors: "Victor Lempitsky" - repositories: { - url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" - owner: "krisrjohnson" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/Ierezell/PapierFewShot" - owner: "Ierezell" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Reimplementation in pytorch of the paper https://arxiv.org/pdf/1905.08233.pdf" - } - repositories: { - url: "https://github.com/times2049/talkinghead" - owner: "times2049" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } repositories: { url: "https://github.com/ZVK/Talking-Heads" owner: "ZVK" @@ -13757,14 +13850,14 @@ pr_id_to_video: { url: "https://github.com/shoutOutYangJie/Few-Shot-Adversarial-Learning-for-face-swap" owner: "shoutOutYangJie" framework: FRAMEWORK_PYTORCH - number_of_stars: 123 + number_of_stars: 124 description: "This is a unofficial re-implementation of the paper \"Few-Shot Adversarial Learning of Realistic Neural Talking Head Models\"" } repositories: { url: "https://github.com/vincent-thevenin/Realistic-Neural-Talking-Head-Models" owner: "vincent-thevenin" framework: FRAMEWORK_PYTORCH - number_of_stars: 654 + number_of_stars: 661 description: "My implementation of Few-Shot Adversarial Learning of Realistic Neural Talking Head Models (Egor Zakharov et al.)." } repositories: { @@ -13777,15 +13870,33 @@ pr_id_to_video: { url: "https://github.com/grey-eye/talking-heads" owner: "grey-eye" framework: FRAMEWORK_PYTORCH - number_of_stars: 508 + number_of_stars: 511 description: "Our implementation of \"Few-Shot Adversarial Learning of Realistic Neural Talking Head Models\" (Egor Zakharov et al.)" } + repositories: { + url: "https://github.com/times2049/talkinghead" + owner: "times2049" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/Ierezell/PapierFewShot" + owner: "Ierezell" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Reimplementation in pytorch of the paper https://arxiv.org/pdf/1905.08233.pdf" + } + repositories: { + url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" + owner: "krisrjohnson" + framework: FRAMEWORK_PYTORCH + } } video: { video_id: "4pY_6VG4npc" video_title: "PR-165: Few-Shot Adversarial Learning of Realistic Neural Talking Head Models" number_of_likes: 41 - number_of_views: 2898 + number_of_views: 2914 published_date: { seconds: 1558879643 } @@ -13809,20 +13920,20 @@ pr_id_to_video: { authors: "Tsung-Yi Lin" authors: "Ruoming Pang" authors: "Quoc V. Le" - repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" - } repositories: { url: "https://github.com/tensorflow/tpu/tree/master/models/official/detection" owner: "official" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 + number_of_stars: 4415 description: "Reference models and tools for Cloud TPUs." } + repositories: { + url: "https://github.com/open-mmlab/mmdetection" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 16041 + description: "OpenMMLab Detection Toolbox and Benchmark" + } methods: { name: "Average Pooling" full_name: "Average Pooling" @@ -13877,8 +13988,8 @@ pr_id_to_video: { video: { video_id: "FAAt0jejWOA" video_title: "PR-166: NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection" - number_of_likes: 15 - number_of_views: 2133 + number_of_likes: 16 + number_of_views: 2162 published_date: { seconds: 1560917270 } @@ -13905,24 +14016,11 @@ pr_id_to_video: { authors: "James Wexler" authors: "Fernanda Viegas" authors: "Rory Sayres" - repositories: { - url: "https://github.com/jwendyr/tcav" - owner: "jwendyr" - framework: FRAMEWORK_TENSORFLOW - description: "tcav" - } - repositories: { - url: "https://github.com/giovannimaffei/concept_activation_vectors" - owner: "giovannimaffei" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Simple implementation of \"Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)\", Been Kim et al., 2017 " - } repositories: { url: "https://github.com/medgift/iMIMIC-RCVs" owner: "medgift" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 + number_of_stars: 15 description: "This repository contains the code for implementing Bidirectional Relevance scores for Digital Histopathology, which was used for the results in the iMIMIC workshop paper: Regression Concept Vectors for Bidirectional Explanations in Histopathology" } repositories: { @@ -13950,7 +14048,7 @@ pr_id_to_video: { url: "https://github.com/tensorflow/tcav" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 471 + number_of_stars: 477 description: "Code for the TCAV ML interpretability project" } repositories: { @@ -13959,12 +14057,25 @@ pr_id_to_video: { framework: FRAMEWORK_OTHERS description: " ⚙📲Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)" } + repositories: { + url: "https://github.com/giovannimaffei/concept_activation_vectors" + owner: "giovannimaffei" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Simple implementation of \"Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV)\", Been Kim et al., 2017 " + } + repositories: { + url: "https://github.com/jwendyr/tcav" + owner: "jwendyr" + framework: FRAMEWORK_TENSORFLOW + description: "tcav" + } } video: { video_id: "-y0oghbEHMM" video_title: "PR-167: Interpretability Beyond Feature Attribution: Testing with Concept Activation Vector (TCAV)" number_of_likes: 4 - number_of_views: 783 + number_of_views: 814 published_date: { seconds: 1559486974 } @@ -13991,25 +14102,6 @@ pr_id_to_video: { authors: "Timo Aila" authors: "Jaakko Lehtinen" authors: "Jan Kautz" - repositories: { - url: "https://github.com/samuelchassot/FUNIT" - owner: "samuelchassot" - framework: FRAMEWORK_PYTORCH - description: "Translate images to unseen domains in the test time with few example images." - } - repositories: { - url: "https://github.com/mkolodny/funit" - owner: "mkolodny" - framework: FRAMEWORK_PYTORCH - } - repositories: { - is_official: true - url: "https://github.com/NVlabs/FUNIT" - owner: "NVlabs" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1439 - description: "Translate images to unseen domains in the test time with few example images." - } repositories: { url: "https://github.com/taki0112/FUNIT-Tensorflow" owner: "taki0112" @@ -14033,7 +14125,7 @@ pr_id_to_video: { url: "https://github.com/shaoanlu/fewshot-face-translation-GAN" owner: "shaoanlu" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 723 + number_of_stars: 725 description: "Generative adversarial networks integrating modules from FUNIT and SPADE for face-swapping." } repositories: { @@ -14047,12 +14139,31 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH description: "Using NVlabs FUNIT to make a photobooth transforming your face into an animal. Displayed at FRINGE Festival 2020" } + repositories: { + is_official: true + url: "https://github.com/NVlabs/FUNIT" + owner: "NVlabs" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1443 + description: "Translate images to unseen domains in the test time with few example images." + } + repositories: { + url: "https://github.com/mkolodny/funit" + owner: "mkolodny" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/samuelchassot/FUNIT" + owner: "samuelchassot" + framework: FRAMEWORK_PYTORCH + description: "Translate images to unseen domains in the test time with few example images." + } } video: { video_id: "ANwAhuOeaiE" video_title: "PR-168: Few Shot Unsupervised Image to Image Translation" - number_of_likes: 11 - number_of_views: 905 + number_of_likes: 12 + number_of_views: 928 published_date: { seconds: 1560267339 } @@ -14075,15 +14186,18 @@ pr_id_to_video: { authors: "Mingxing Tan" authors: "Quoc V. Le" repositories: { - url: "https://github.com/houstonsantos/CassavaLeafDisease" - owner: "houstonsantos" - framework: FRAMEWORK_OTHERS - description: ":robot:" + url: "https://github.com/rajneeshaggarwal/google-efficientnet" + owner: "rajneeshaggarwal" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11 + description: "Replica of code from here: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet" } repositories: { - url: "https://github.com/mvenouziou/Project-Attention-Is-What-You-Get" - owner: "mvenouziou" + url: "https://github.com/titu1994/keras-efficientnets" + owner: "titu1994" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 186 + description: "Keras Implementation of EfficientNets" } repositories: { url: "https://github.com/jason90330/EdgeFinal" @@ -14092,52 +14206,53 @@ pr_id_to_video: { number_of_stars: 1 } repositories: { - url: "https://github.com/Tirth27/Skin-Cancer-Classification-using-Deep-Learning" - owner: "Tirth27" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/linhduongtuan/Fruits_Vegetables_Classifier_WebApp" + owner: "linhduongtuan" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "Classify Skin cancer from the skin lesion images using Image classification. The dataset for the project is obtained from the Kaggle SIIM-ISIC-Melanoma-Classification competition. " + description: "Git for 120 Fruits & Vegetables classification and Webapp Deployment for our paper: Automated fruit recognition using EfficientNet and MixNet; https://doi.org/10.1016/j.compag.2020.105326" } repositories: { - url: "https://github.com/reyvaz/pneumothorax_detection" - owner: "reyvaz" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/PotatoSpudowski/CactiNet" + owner: "PotatoSpudowski" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "Pneumothorax Disease Detection and Segmentation using X-Ray Images" + description: "Pytorch Implementation of a CNN similar to Google Brain's new EfficientNet from scratch to identify images of cactus🌵" } repositories: { - url: "https://github.com/reyvaz/steel-defect-segmentation" - owner: "reyvaz" + url: "https://github.com/DableUTeeF/keras-efficientnet" + owner: "DableUTeeF" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "EfficientNet-Based Unet++ Model for Steel Defect Segmentation" - } - repositories: { - url: "https://github.com/jaketae/mlp-mixer" - owner: "jaketae" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "PyTorch implementation of MLP-Mixer: An all-MLP Architecture for Vision" + number_of_stars: 17 + description: "keras-efficientnet: A Keras implementation of EfficientNet" } repositories: { - url: "https://github.com/lpirola13/flower-recognizer" - owner: "lpirola13" + url: "https://github.com/morganmcg1/stanford-cars" + owner: "morganmcg1" framework: FRAMEWORK_TENSORFLOW - description: "This project aims to create a deep learning model suitable in a mobile context that can recognize flowers from images." + number_of_stars: 44 + description: "Learning computer vision by striving to maximise accuracy on the Stanford Cars dataset" } repositories: { - url: "https://github.com/HyeonhoonLee/MAIC2021_Sleep" - owner: "HyeonhoonLee" + url: "https://github.com/miramind/efficientnets_pytorch" + owner: "miramind" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "4th place in MAIC2021 Sleep AI Challenge (SleepingDragon)" + number_of_stars: 2 + description: "Implementation of Efficientnets on PyTorch" } repositories: { - url: "https://github.com/lukemelas/EfficientNet-PyTorch" - owner: "lukemelas" + url: "https://github.com/mingxingtan/efficientnet" + owner: "mingxingtan" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6217 - description: "A PyTorch implementation of EfficientNet and EfficientNetV2 (coming soon!)" + number_of_stars: 81 + description: "EfficientNets snapshot" + } + repositories: { + url: "https://github.com/rohitgr7/tvmodels" + owner: "rohitgr7" + framework: FRAMEWORK_PYTORCH + number_of_stars: 11 + description: "Pytorch implementation of vision models." } methods: { name: "Average Pooling" @@ -14193,8 +14308,8 @@ pr_id_to_video: { video: { video_id: "Vhz0quyvR7I" video_title: "PR-169: EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" - number_of_likes: 104 - number_of_views: 6424 + number_of_likes: 109 + number_of_views: 6598 published_date: { seconds: 1560496231 } @@ -14219,72 +14334,72 @@ pr_id_to_video: { authors: "Shaoqing Ren" authors: "Jian Sun" repositories: { - url: "https://github.com/pytorch/vision" - owner: "pytorch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9433 - description: "Datasets, Transforms and Models specific to Computer Vision" + url: "https://github.com/Sakib1263/1DResNet-KERAS" + owner: "Sakib1263" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Models supported: ResNet18, ResNet34, ResNet50, ResNet101, ResNet 152 (1D and 2D versions with DEMO for Classification and Regression)." } repositories: { - url: "https://github.com/barmayo/spatial_attention" - owner: "barmayo" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Visual Navigation with Spatial Attention" + url: "https://github.com/Sakib1263/ResNet1D-Model-Builder-KERAS" + owner: "Sakib1263" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Models supported: ResNet18, ResNet34, ResNet50, ResNet101, ResNet 152 (1D and 2D versions with DEMO for Classification and Regression)." + } + repositories: { + url: "https://github.com/Sakib1263/1DResNet-Builder-KERAS" + owner: "Sakib1263" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Models supported: ResNet18, ResNet34, ResNet50, ResNet101, ResNet 152 (1D and 2D versions with DEMO for Classification and Regression)." + } + repositories: { + url: "https://github.com/RobotMobile/cv-deep-learning-paper-review" + owner: "RobotMobile" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { url: "https://github.com/tensorflow/models/tree/master/research/deeplab" owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/vinod377/STN-OCR" - owner: "vinod377" + url: "https://github.com/tensorflow/models/tree/master/research/slim" + owner: "research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Implementation of \"STN-OCR: A single Neural Network for Text Detection and Text Recognition\" in natural Scenes by Christian Bartz." + number_of_stars: 70934 + description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/facebookresearch/pycls" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1648 - description: "Codebase for Image Classification Research, written in PyTorch." + url: "https://github.com/kwotsin/TensorFlow-ENet" + owner: "kwotsin" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 255 + description: "TensorFlow implementation of ENet" } repositories: { - url: "https://github.com/MarkHershey/arxiv-dl" - owner: "MarkHershey" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Command-line arXiv.org Papers Downloader" + url: "https://github.com/nsom/VGG16" + owner: "nsom" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "My Pytorch Implementation of some basic models." } repositories: { url: "https://github.com/FrancescoSaverioZuppichini/ResNet" owner: "FrancescoSaverioZuppichini" framework: FRAMEWORK_PYTORCH - number_of_stars: 96 + number_of_stars: 98 description: "Clean, scalable and easy to use ResNet implementation in Pytorch" } repositories: { - url: "https://github.com/Masao-Taketani/FOTS_OCR" - owner: "Masao-Taketani" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 41 - description: "TensorFlow Implementation of FOTS, Fast Oriented Text Spotting with a Unified Network." - } - repositories: { - url: "https://github.com/amogh7joshi/plant-health-detection" - owner: "amogh7joshi" + url: "https://github.com/CanshangD/ResNet_tensorflow2.0" + owner: "CanshangD" framework: FRAMEWORK_TENSORFLOW - description: "Detecting plant health using neural networks." - } - repositories: { - url: "https://github.com/tiagoCuervo/JapaNet" - owner: "tiagoCuervo" - framework: FRAMEWORK_TENSORFLOW - description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" + description: "This is a ResNet implementation based on tensorflow2.0." } methods: { name: "Convolution" @@ -14341,7 +14456,7 @@ pr_id_to_video: { video_id: "7fSgqlC7Wdo" video_title: "PR-170: ResNet - Deep Residual Learning for Image Recognition" number_of_likes: 17 - number_of_views: 1776 + number_of_views: 1793 published_date: { seconds: 1565744287 } @@ -14370,14 +14485,14 @@ pr_id_to_video: { url: "https://github.com/wy1iu/LargeMargin_Softmax_Loss" owner: "wy1iu" framework: FRAMEWORK_PYTORCH - number_of_stars: 320 + number_of_stars: 321 description: "Implementation for in ICML'16." } repositories: { url: "https://github.com/amirhfarzaneh/lsoftmax-pytorch" owner: "amirhfarzaneh" framework: FRAMEWORK_PYTORCH - number_of_stars: 142 + number_of_stars: 144 description: "The Pytorch Implementation of L-Softmax" } methods: { @@ -14389,8 +14504,8 @@ pr_id_to_video: { video: { video_id: "7TugLIfExKM" video_title: "PR-171: Large margin softmax loss for Convolutional Neural Networks" - number_of_likes: 4 - number_of_views: 1064 + number_of_likes: 5 + number_of_views: 1081 published_date: { seconds: 1561534996 } @@ -14416,26 +14531,6 @@ pr_id_to_video: { authors: "Amir Sadeghian" authors: "Ian Reid" authors: "Silvio Savarese" - repositories: { - url: "https://github.com/AnselmC/bamot" - owner: "AnselmC" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Bundle Adjustment for Multiple Object Tracking" - } - repositories: { - url: "https://github.com/sremes/a2d2" - owner: "sremes" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - } - repositories: { - url: "https://github.com/JaryHuang/awesome_SSD_FPN_GIoU" - owner: "JaryHuang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 99 - description: "This repository carries out some paper recurring work" - } repositories: { url: "https://github.com/OFRIN/Tensorflow_GIoU" owner: "OFRIN" @@ -14450,6 +14545,12 @@ pr_id_to_video: { number_of_stars: 40 description: "Caffe version Generalized & Distance & Complete Iou loss Implementation for Faster RCNN/FPN bbox regression" } + repositories: { + url: "https://github.com/sremes/a2d2" + owner: "sremes" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + } repositories: { url: "https://github.com/RuiminChen/GIouloss_CIouloss_caffe" owner: "RuiminChen" @@ -14471,12 +14572,33 @@ pr_id_to_video: { number_of_stars: 11 description: "Change smooth L1 loss to GIoU loss for RetinaNet" } + repositories: { + url: "https://github.com/JaryHuang/awesome_SSD_FPN_GIoU" + owner: "JaryHuang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 99 + description: "This repository carries out some paper recurring work" + } + repositories: { + url: "https://github.com/AnselmC/bamot" + owner: "AnselmC" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Bundle Adjustment for Multiple Object Tracking" + } + repositories: { + url: "https://github.com/gau-nernst/CenterNet" + owner: "gau-nernst" + framework: FRAMEWORK_PYTORCH + number_of_stars: 26 + description: "Implementation of CenterNet and FairMOT with PyTorch Lightning" + } } video: { video_id: "ENZBhDx0kqM" video_title: "PR-172: Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression" - number_of_likes: 30 - number_of_views: 1851 + number_of_likes: 31 + number_of_views: 1901 published_date: { seconds: 1561353991 } @@ -14506,25 +14628,6 @@ pr_id_to_video: { authors: "Timothy D. Hirzel" authors: "Ryan P. Adams" authors: "Alán Aspuru-Guzik" - repositories: { - url: "https://github.com/Ishan-Kumar2/Molecular_VAE_Pytorch" - owner: "Ishan-Kumar2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "PyTorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" - } - repositories: { - url: "https://github.com/TrentBrick/PAE" - owner: "TrentBrick" - framework: FRAMEWORK_PYTORCH - description: "Primary and Tertiary Sequence AutoEncoder" - } - repositories: { - url: "https://github.com/leungjch/drug_VAE" - owner: "leungjch" - framework: FRAMEWORK_OTHERS - description: "VAE trained on MOSES SMILES to produce novel molecules with druglike properties." - } repositories: { url: "https://github.com/shamelmerchant/keras-molecules" owner: "shamelmerchant" @@ -14541,7 +14644,7 @@ pr_id_to_video: { url: "https://github.com/aspuru-guzik-group/chemical_vae" owner: "aspuru-guzik-group" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 306 + number_of_stars: 309 description: "Code for 10.1021/acscentsci.7b00572, now running on Keras 2.0 and Tensorflow" } repositories: { @@ -14549,14 +14652,14 @@ pr_id_to_video: { url: "https://github.com/HIPS/molecule-autoencoder" owner: "HIPS" framework: FRAMEWORK_OTHERS - number_of_stars: 136 + number_of_stars: 139 description: "A project to enable optimization of molecules by transforming them to and from a continuous representation." } repositories: { url: "https://github.com/aksub99/molecular-vae" owner: "aksub99" framework: FRAMEWORK_PYTORCH - number_of_stars: 20 + number_of_stars: 21 description: "Pytorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" } repositories: { @@ -14565,6 +14668,25 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW number_of_stars: 2 } + repositories: { + url: "https://github.com/leungjch/drug_VAE" + owner: "leungjch" + framework: FRAMEWORK_OTHERS + description: "VAE trained on MOSES SMILES to produce novel molecules with druglike properties." + } + repositories: { + url: "https://github.com/TrentBrick/PAE" + owner: "TrentBrick" + framework: FRAMEWORK_PYTORCH + description: "Primary and Tertiary Sequence AutoEncoder" + } + repositories: { + url: "https://github.com/Ishan-Kumar2/Molecular_VAE_Pytorch" + owner: "Ishan-Kumar2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "PyTorch implementation of the paper \"Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules\"" + } } video: { video_id: "hk4e8ZCkNWg" @@ -14586,7 +14708,7 @@ pr_id_to_video: { video_id: "yqFDyX4ErSI" video_title: "PR-174: Restricted Boltzmann Machine and Deep Belief Networks" number_of_likes: 12 - number_of_views: 1007 + number_of_views: 1035 published_date: { seconds: 1561903626 } @@ -14613,24 +14735,37 @@ pr_id_to_video: { authors: "Ruslan Salakhutdinov" authors: "Quoc V. Le" repositories: { - url: "https://github.com/SambhawDrag/XLNet.jl" - owner: "SambhawDrag" + url: "https://github.com/graykode/xlnet-Pytorch" + owner: "graykode" + framework: FRAMEWORK_PYTORCH + number_of_stars: 515 + description: "Simple XLNet implementation with Pytorch Wrapper" + } + repositories: { + url: "https://github.com/tomgoter/nlp_finalproject" + owner: "tomgoter" framework: FRAMEWORK_TENSORFLOW - description: "A Julia-based implementation of XLNet: A Generalized Autoregressive Pretraining for Language Understanding. < Flux | JuliaText >" + number_of_stars: 2 + description: "Repository for Final Project for W266: Natural Language Processing with Deep Learning" } repositories: { - url: "https://github.com/huggingface/transformers" + url: "https://github.com/huggingface/xlnet" owner: "huggingface" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 25 + description: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" + } + repositories: { + url: "https://github.com/listenviolet/XLNet" + owner: "listenviolet" framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 - description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { - url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/xlnet" - owner: "language_model" - framework: FRAMEWORK_OTHERS - number_of_stars: 1489 - description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." + url: "https://github.com/fanchenyou/transformer-study" + owner: "fanchenyou" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Transformer network variants tutorials" } repositories: { url: "https://github.com/https-seyhan/BugAI" @@ -14640,43 +14775,31 @@ pr_id_to_video: { description: "Deep Learning Models (Long Short Term Memory (LSTM), Recurrent Neural Networks (RNN), Convolutional Neural Networks (CNN) for AI based Bug prediction" } repositories: { - url: "https://github.com/utterworks/fast-bert" - owner: "utterworks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1608 - description: "Super easy library for BERT based NLP models" - } - repositories: { - url: "https://github.com/zaradana/Fast_BERT" - owner: "zaradana" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/studio-ousia/luke" - owner: "studio-ousia" - framework: FRAMEWORK_PYTORCH - number_of_stars: 320 - description: "LUKE -- Language Understanding with Knowledge-based Embeddings" + url: "https://github.com/2miatran/Natural-Language-Processing" + owner: "2miatran" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "BERT, ULMFiT" } repositories: { - url: "https://github.com/huggingface/xlnet" - owner: "huggingface" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 25 - description: "XLNet: Generalized Autoregressive Pretraining for Language Understanding" + url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/xlnet" + owner: "language_model" + framework: FRAMEWORK_OTHERS + number_of_stars: 1698 + description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." } repositories: { - url: "https://github.com/cuhksz-nlp/SAPar" - owner: "cuhksz-nlp" + url: "https://github.com/samwisegamjeee/pytorch-transformers" + owner: "samwisegamjeee" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + description: "👾 A library of state-of-the-art pretrained models for Natural Language Processing (NLP)" } repositories: { - url: "https://github.com/graykode/xlnet-Pytorch" - owner: "graykode" + url: "https://github.com/kaushaltrivedi/fast-bert" + owner: "kaushaltrivedi" framework: FRAMEWORK_PYTORCH - number_of_stars: 514 - description: "Simple XLNet implementation with Pytorch Wrapper" + number_of_stars: 1619 + description: "Super easy library for BERT based NLP models" } methods: { name: "Variational Dropout" @@ -14733,7 +14856,7 @@ pr_id_to_video: { video_id: "koj9BKiu1rU" video_title: "PR-175: XLNet: Generalized Autoregressive Pretraining for Language Understanding" number_of_likes: 57 - number_of_views: 2456 + number_of_views: 2515 published_date: { seconds: 1561964703 } @@ -14758,13 +14881,6 @@ pr_id_to_video: { authors: "Jeff Bilmes" authors: "Gopinath Chennupati" authors: "Jamal Mohd-Yusof" - repositories: { - url: "https://github.com/eabarnes1010/controlled_abstention_networks" - owner: "eabarnes1010" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Neural network loss functions for regression and classification tasks that can say \"I don't know\"." - } repositories: { is_official: true url: "https://github.com/thulas/dac-label-noise" @@ -14773,12 +14889,19 @@ pr_id_to_video: { number_of_stars: 45 description: "Label de-noising for deep learning" } + repositories: { + url: "https://github.com/eabarnes1010/controlled_abstention_networks" + owner: "eabarnes1010" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Neural network loss functions for regression and classification tasks that can say \"I don't know\"." + } } video: { video_id: "ihbEF6WGlrA" video_title: "PR-176: Combating Label Noise in Deep Learning using Abstention" number_of_likes: 18 - number_of_views: 904 + number_of_views: 910 published_date: { seconds: 1562509657 } @@ -14800,11 +14923,6 @@ pr_id_to_video: { } authors: "Yoseob Han" authors: "Jong Chul Ye" - repositories: { - url: "https://github.com/hjahan58/framing-u-net" - owner: "hjahan58" - framework: FRAMEWORK_OTHERS - } repositories: { url: "https://github.com/jongcye/FramingUNet" owner: "jongcye" @@ -14819,6 +14937,11 @@ pr_id_to_video: { number_of_stars: 15 description: "Deep Convolutional Framelets: A General Deep Learning Framework for Inverse Problems" } + repositories: { + url: "https://github.com/hjahan58/framing-u-net" + owner: "hjahan58" + framework: FRAMEWORK_OTHERS + } methods: { name: "Convolution" full_name: "Convolution" @@ -14848,7 +14971,7 @@ pr_id_to_video: { video: { video_id: "KSJcQlEKI0Q" video_title: "PR-177: Framing U-Net via Deep Convolutional Framelets" - number_of_views: 586 + number_of_views: 590 published_date: { seconds: 1562511247 } @@ -14871,46 +14994,54 @@ pr_id_to_video: { authors: "Thomas N. Kipf" authors: "Max Welling" repositories: { - url: "https://github.com/giuseppefutia/link-prediction-code" - owner: "giuseppefutia" + url: "https://github.com/HoganZhang/pygcn_python3" + owner: "HoganZhang" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 } repositories: { - url: "https://github.com/andrejmiscic/gcn-pytorch" - owner: "andrejmiscic" + is_official: true + url: "https://github.com/tkipf/pygcn" + owner: "tkipf" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Implementation of the Graph Convolutional Networks in Pytorch" + number_of_stars: 3698 + description: "Graph Convolutional Networks in PyTorch" } repositories: { - url: "https://github.com/switiz/gnn-gcn-gat" - owner: "switiz" + url: "https://github.com/bcsrn/gcn" + owner: "bcsrn" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "example of gnns" } repositories: { - url: "https://github.com/hazdzz/GCN" - owner: "hazdzz" + url: "https://github.com/darnbi/pygcn" + owner: "darnbi" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "The PyTorch version of GCN implemented by the paper." } repositories: { - url: "https://github.com/LouisDumont/GCN---re-implementation" - owner: "LouisDumont" - framework: FRAMEWORK_PYTORCH - description: "A re-implementation of the Graph Neural Networks described in https://arxiv.org/abs/1609.02907" + is_official: true + url: "https://github.com/tkipf/gcn" + owner: "tkipf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5852 + description: "Implementation of Graph Convolutional Networks in TensorFlow" + } + repositories: { + is_official: true + url: "https://github.com/tkipf/keras-gcn" + owner: "tkipf" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 752 + description: "Keras implementation of Graph Convolutional Networks" } repositories: { - url: "https://github.com/thanhtrunghuynh93/pygcn" - owner: "thanhtrunghuynh93" + url: "https://github.com/KimMeen/GCN" + owner: "KimMeen" framework: FRAMEWORK_PYTORCH + description: "A PyTorch implementation of the (GCN) paper https://arxiv.org/abs/1609.02907" } repositories: { - url: "https://github.com/lipingcoding/pygcn" - owner: "lipingcoding" + url: "https://github.com/Anieca/GCN" + owner: "Anieca" framework: FRAMEWORK_PYTORCH } repositories: { @@ -14919,17 +15050,9 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/LeeWooJung/GCN_reproduce" - owner: "LeeWooJung" + url: "https://github.com/lipingcoding/pygcn" + owner: "lipingcoding" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Reproduce GCN in pytorch" - } - repositories: { - url: "https://github.com/dtriepke/Graph_Convolutional_Network" - owner: "dtriepke" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 } methods: { name: "Graph Convolutional Networks" @@ -14945,8 +15068,8 @@ pr_id_to_video: { video: { video_id: "uqBsvoOY8jM" video_title: "PR-178: Graph Convolutional Network" - number_of_likes: 98 - number_of_views: 6224 + number_of_likes: 102 + number_of_views: 6377 published_date: { seconds: 1563112484 } @@ -14974,7 +15097,7 @@ pr_id_to_video: { video_id: "CpRGaFPIZnw" video_title: "PR-179: M3D-GAN: Multi-Modal Multi-Domain Translation with Universal Attention" number_of_likes: 8 - number_of_views: 559 + number_of_views: 562 published_date: { seconds: 1563115146 } @@ -14997,74 +15120,78 @@ pr_id_to_video: { authors: "Jonathan Frankle" authors: "Michael Carbin" repositories: { - url: "https://github.com/phiandark/SiftingFeatures" - owner: "phiandark" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Code for the paper \"Sifting out the features by pruning: Are convolutional networks the winning lottery ticket of fully connected ones?\"" - } - repositories: { - url: "https://github.com/hdo0947/Lottery-Ticket-Hypothesis" - owner: "hdo0947" + url: "https://github.com/gcastex/PruNet" + owner: "gcastex" framework: FRAMEWORK_PYTORCH - description: "Project with Jack Weitze" + number_of_stars: 13 + description: "Pruning applied to Facial Recognition." } repositories: { - url: "https://github.com/JingtongSu/sanity-checking-pruning" - owner: "JingtongSu" + url: "https://github.com/reallygooday/60daysofudacity" + owner: "reallygooday" framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "Code for Sanity-Checking Pruning Methods: Random Tickets can Win the Jackpot" + number_of_stars: 7 + description: "Pledged to engage with the topics of SPAIC Program for at least 30 minutes per day for 60 days." } repositories: { - url: "https://github.com/ARMargolis/melanoma-pytorch" - owner: "ARMargolis" + url: "https://github.com/emerali/LottoRBM" + owner: "emerali" framework: FRAMEWORK_PYTORCH - description: "Development of a PyTorch model for Kaggle melanoma competition" + number_of_stars: 2 } repositories: { - url: "https://github.com/zhangtj1996/lottery-ticket-hypothesis-Mxnet" - owner: "zhangtj1996" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A reimplementation of \"The Lottery Ticket Hypothesis\" (Frankle and Carbin) by Mxnet for FC network." + url: "https://github.com/matthew-mcateer/Keras_pruning" + owner: "matthew-mcateer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 13 + description: "A walkthrough of how to prune keras models, using both weight-pruning and unit/neuron-pruning." } repositories: { - url: "https://github.com/Taoudi/LotteryTicketHypothesis" - owner: "Taoudi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Kevin Ammouri and Youssef Taoudi" + url: "https://github.com/facebookresearch/open_lth" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 440 + description: "A repository in preparation for open-sourcing lottery ticket hypothesis code." } repositories: { - url: "https://github.com/COMP6248-Reproducability-Challenge/REPRODUCIBILITY-REPORT-THE-LOTTERY-TICKET-HYPOTHESIS" - owner: "COMP6248-Reproducability-Challenge" + url: "https://github.com/ARMargolis/melanoma-pytorch" + owner: "ARMargolis" framework: FRAMEWORK_PYTORCH + description: "Development of a PyTorch model for Kaggle melanoma competition" } repositories: { - url: "https://github.com/Theys96/lottery-ticket-hypothesis" - owner: "Theys96" + url: "https://github.com/uber-research/deconstructing-lottery-tickets" + owner: "uber-research" framework: FRAMEWORK_TENSORFLOW - description: "Experimentation setup for the \"Lottery Ticket\" hypothesis for neural networks." + number_of_stars: 104 } repositories: { - url: "https://github.com/Happy-Virus-IkBeom/LTH_Tensorflow" - owner: "Happy-Virus-IkBeom" + url: "https://github.com/Mraksu/Lottery-Ticket" + owner: "Mraksu" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Experiments about Lottery Ticket Hypothesis" } repositories: { - url: "https://github.com/kenichdietrich/LotteryTicketHypothesis" - owner: "kenichdietrich" + url: "https://github.com/google-research/lottery-ticket-hypothesis" + owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Codes to perform LTH with Keras and Tensorflow" + number_of_stars: 625 + description: "A reimplementation of \"The Lottery Ticket Hypothesis\" (Frankle and Carbin) on MNIST." + } + repositories: { + url: "https://github.com/rahulvigneswaran/Lottery-Ticket-Hypothesis-in-Pytorch" + owner: "rahulvigneswaran" + framework: FRAMEWORK_PYTORCH + number_of_stars: 176 + description: "This repository contains a Pytorch implementation of the paper \"The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks\" by Jonathan Frankle and Michael Carbin that can be easily adapted to any model/dataset." } } video: { video_id: "dkNmYu610r8" video_title: "PR-180: The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" - number_of_likes: 11 - number_of_views: 763 + number_of_likes: 12 + number_of_views: 771 published_date: { seconds: 1564043119 } @@ -15086,26 +15213,12 @@ pr_id_to_video: { } authors: "Amirata Ghorbani" authors: "James Zou" - repositories: { - url: "https://github.com/Weixin-Liang/HERALD" - owner: "Weixin-Liang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "HERALD: An Annotation Efficient Method to Train User EngagementPredictors in Dialogs (ACL 2021)" - } - repositories: { - url: "https://github.com/Weixin-Liang/dialog_evaluation_CMADE" - owner: "Weixin-Liang" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Beyond User Self-Reported Likert Scale Ratings: A Comparison Model for Automatic Dialog Evaluation (ACL 2020)" - } repositories: { is_official: true url: "https://github.com/amiratag/DataShapley" owner: "amiratag" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 135 + number_of_stars: 136 description: "Data Shapley: Equitable Valuation of Data for Machine Learning" } repositories: { @@ -15115,12 +15228,26 @@ pr_id_to_video: { number_of_stars: 2 description: "All my two month works for projects toward Interpretable Machine Learning for Cathay(國泰金控 數數發 資料科學研發科 Lab)" } + repositories: { + url: "https://github.com/Weixin-Liang/dialog_evaluation_CMADE" + owner: "Weixin-Liang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Beyond User Self-Reported Likert Scale Ratings: A Comparison Model for Automatic Dialog Evaluation (ACL 2020)" + } + repositories: { + url: "https://github.com/Weixin-Liang/HERALD" + owner: "Weixin-Liang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "HERALD: An Annotation Efficient Method to Train User EngagementPredictors in Dialogs (ACL 2021)" + } } video: { video_id: "YdCXbBDuVuE" video_title: "PR-181: Data Shapley: Equitable Valuation of Data for Machine Learning" number_of_likes: 7 - number_of_views: 536 + number_of_views: 544 published_date: { seconds: 1563717023 } @@ -15149,7 +15276,7 @@ pr_id_to_video: { video_id: "twhZ3j_VCa0" video_title: "PR-182: Deep Learning Ensemble Method" number_of_likes: 16 - number_of_views: 786 + number_of_views: 808 published_date: { seconds: 1564898851 } @@ -15172,39 +15299,62 @@ pr_id_to_video: { authors: "Mingxing Tan" authors: "Quoc V. Le" repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + is_official: true + url: "https://github.com/tensorflow/tpu" + owner: "tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4415 + description: "Reference models and tools for Cloud TPUs." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" + url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet" + owner: "mnasnet" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4415 + description: "Reference models and tools for Cloud TPUs." + } + repositories: { + url: "https://github.com/zsef123/MixNet-PyTorch" + owner: "zsef123" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "A PyTorch implementation of MixNet: Mixed Depthwise Convolutional Kernels" + } + repositories: { + url: "https://github.com/JinLi711/Convolution_Variants" + owner: "JinLi711" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." + } + repositories: { + url: "https://github.com/JinLi711/Attention-Augmented-Convolution" + owner: "JinLi711" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." + } + repositories: { + url: "https://github.com/rwightman/gen-efficientnet-pytorch" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + number_of_stars: 1369 + description: "Pretrained EfficientNet, EfficientNet-Lite, MixNet, MobileNetV3 / V2, MNASNet A1 and B1, FBNet, Single-Path NAS" } repositories: { url: "https://github.com/osmr/imgclsmob" owner: "osmr" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 + number_of_stars: 2268 description: "Sandbox for training deep learning networks" } repositories: { url: "https://github.com/rwightman/efficientnet-jax" owner: "rwightman" framework: FRAMEWORK_OTHERS - number_of_stars: 69 + number_of_stars: 72 description: "EfficientNet, MobileNetV3, MobileNetV2, MixNet, etc in JAX w/ Flax Linen and Objax" } - repositories: { - url: "https://github.com/chrisway613/MixConv" - owner: "chrisway613" - framework: FRAMEWORK_PYTORCH - description: "Mixed Depth-Wise Convolution" - } repositories: { url: "https://github.com/neeraj-j/MixNet" owner: "neeraj-j" @@ -15212,33 +15362,11 @@ pr_id_to_video: { description: "Pytorch implementation of MixNet" } repositories: { - is_official: true - url: "https://github.com/tensorflow/tpu" - owner: "tensorflow" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 - description: "Reference models and tools for Cloud TPUs." - } - repositories: { - url: "https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet" - owner: "mnasnet" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 - description: "Reference models and tools for Cloud TPUs." - } - repositories: { - url: "https://github.com/zsef123/MixNet-PyTorch" - owner: "zsef123" + url: "https://github.com/chrisway613/MixConv" + owner: "chrisway613" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "A PyTorch implementation of MixNet: Mixed Depthwise Convolutional Kernels" - } - repositories: { - url: "https://github.com/JinLi711/Convolution_Variants" - owner: "JinLi711" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - description: "Reimplementing SOTA convolution variants with Tensorflow 2.0." + number_of_stars: 1 + description: "Mixed Depth-Wise Convolution" } methods: { name: "Average Pooling" @@ -15260,26 +15388,11 @@ pr_id_to_video: { full_name: "MobileNetV1" description: "**MobileNet** is a type of convolutional neural network designed for mobile and embedded vision applications. They are based on a streamlined architecture that uses depthwise separable convolutions to build lightweight deep neural networks that can have low latency for mobile and embedded devices." } - methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" - } methods: { name: "MixConv" full_name: "Mixed Depthwise Convolution" description: "**MixConv**, or **Mixed Depthwise Convolution**, is a type of depthwise convolution that naturally mixes up multiple kernel sizes in a single convolution. It is based on the insight that depthwise convolution applies a single kernel size to all channels, which MixConv overcomes by combining the benefits of multiple kernel sizes. It does this by partitioning channels into groups and applying a different kernel size to each group." } - methods: { - name: "Grouped Convolution" - full_name: "Grouped Convolution" - description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." - } - methods: { - name: "MixNet" - full_name: "MixNet" - description: "**MixNet** is a type of convolutional neural network discovered via AutoML that utilises MixConvs instead of regular depthwise convolutions." - } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -15290,12 +15403,27 @@ pr_id_to_video: { full_name: "Sigmoid Activation" description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } + methods: { + name: "Squeeze-and-Excitation Block" + full_name: "Squeeze-and-Excitation Block" + description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } } video: { video_id: "252YxqpHzsg" video_title: "PR-183: MixNet: Mixed Depthwise Convolutional Kernels" - number_of_likes: 29 - number_of_views: 1692 + number_of_likes: 30 + number_of_views: 1715 published_date: { seconds: 1564326548 } @@ -15321,26 +15449,26 @@ pr_id_to_video: { authors: "Benjamin Graham" authors: "Hervé Jégou" repositories: { - url: "https://github.com/huggingface/block_movement_pruning" - owner: "huggingface" + is_official: true + url: "https://github.com/facebookresearch/kill-the-bits" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 30 - description: "Block Sparse movement pruning" + number_of_stars: 616 + description: "Code for: \"And the bit goes down: Revisiting the quantization of neural networks\"" } repositories: { url: "https://github.com/uber-research/permute-quantize-finetune" owner: "uber-research" framework: FRAMEWORK_PYTORCH - number_of_stars: 99 + number_of_stars: 103 description: "Using ideas from product quantization for state-of-the-art neural network compression." } repositories: { - is_official: true - url: "https://github.com/facebookresearch/kill-the-bits" - owner: "facebookresearch" + url: "https://github.com/huggingface/block_movement_pruning" + owner: "huggingface" framework: FRAMEWORK_PYTORCH - number_of_stars: 612 - description: "Code for: \"And the bit goes down: Revisiting the quantization of neural networks\"" + number_of_stars: 34 + description: "Block Sparse movement pruning" } methods: { name: "Mask R-CNN" @@ -15367,7 +15495,7 @@ pr_id_to_video: { video_id: "FPLvzxH8geY" video_title: "PR-184: And the Bit Goes Down: Revisiting the Quantization of Neural Networks" number_of_likes: 8 - number_of_views: 533 + number_of_views: 536 published_date: { seconds: 1564926928 } @@ -15394,75 +15522,80 @@ pr_id_to_video: { authors: "Irene Kotsia" authors: "Stefanos Zafeiriou" repositories: { - url: "https://github.com/jason90330/EdgeFinal" - owner: "jason90330" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/vladimirwest/insightface_cinematic" - owner: "vladimirwest" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } - repositories: { - url: "https://github.com/iChenning/facedetection" - owner: "iChenning" + url: "https://github.com/elliottzheng/face-detection" + owner: "elliottzheng" framework: FRAMEWORK_PYTORCH - description: "face detection,based on retinaface" + number_of_stars: 34 + description: "Fast and reliable face detection with RetinaFace.PyTorch" } repositories: { - url: "https://github.com/serengil/retinaface" - owner: "serengil" + url: "https://github.com/peteryuX/retinaface-tf2" + owner: "peteryuX" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 76 - description: "RetinaFace: Deep Face Detection Library in TensorFlow for Python" + number_of_stars: 209 + description: "RetinaFace (Single-stage Dense Face Localisation in the Wild, 2019) implemented (ResNet50, MobileNetV2 trained on single GPU) in Tensorflow 2.0+. This is an unofficial implementation. With Colab." } repositories: { - url: "https://github.com/serengil/deepface" - owner: "serengil" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1853 - description: "DeepFace: A Lightweight Deep Face Recognition and Facial Attribute Analysis (Age, Gender, Emotion and Race) Framework for Python" + url: "https://github.com/biubug6/Pytorch_Retinaface" + owner: "biubug6" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1567 + description: "Retinaface get 80.99% in widerface hard val using mobilenet0.25." } repositories: { - url: "https://github.com/nickuntitled/censorface-js" - owner: "nickuntitled" + url: "https://github.com/jason90330/EdgeFinal" + owner: "jason90330" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "Face Detection in Javascript by ONNX.js" } repositories: { - url: "https://github.com/prajinkhadka/face_det_check" - owner: "prajinkhadka" + url: "https://github.com/neelabh17/MAVI-Face" + owner: "neelabh17" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 5 + description: "Implementation for MAVI Face detection using RetinaFace pretrained on Widerface dataset" } repositories: { - url: "https://github.com/Johnny952/retinaface_mod" - owner: "Johnny952" + url: "https://github.com/StanislasBertrand/retinaface4j" + owner: "StanislasBertrand" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - } - repositories: { - url: "https://github.com/SohamSarfare/ADS" - owner: "SohamSarfare" - framework: FRAMEWORK_OTHERS - description: "Re-evaluating the results of the paper RetinaFace algorithm using original data along with the original WIDERFACE dataset. " + description: "RetinaFace: Single-stage Dense Face Localisation in the Wild, published in 2019. Reimplemented in java, with pretrained weights." } repositories: { url: "https://github.com/bubbliiiing/retinaface-keras" owner: "bubbliiiing" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 43 + number_of_stars: 44 description: "这是一个retinaface-keras的源码,可以用于训练自己的模型。" } + repositories: { + url: "https://github.com/Qengineering/Face-detection-Raspberry-Pi-32-64-bits" + owner: "Qengineering" + framework: FRAMEWORK_OTHERS + number_of_stars: 29 + description: "Super fast face detection on Raspberry Pi 4" + } + repositories: { + url: "https://github.com/1996scarlet/Laser-Eye" + owner: "1996scarlet" + framework: FRAMEWORK_OTHERS + number_of_stars: 60 + description: "Gaze Estimation via Deep Neural Networks" + } + repositories: { + url: "https://github.com/niuxiaozhang/mxnet-retinaface-to-gluon" + owner: "niuxiaozhang" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "mxnet 版本retianface 转gluon版本" + } } video: { video_id: "DkcHEnxkXpM" video_title: "PR-185: RetinaFace: Single-stage Dense Face Localisation in the Wild" number_of_likes: 25 - number_of_views: 2423 + number_of_views: 2451 published_date: { seconds: 1570081394 } @@ -15485,67 +15618,72 @@ pr_id_to_video: { authors: "Xun Huang" authors: "Serge Belongie" repositories: { - url: "https://github.com/J3698/AdaIN-reimplementation" - owner: "J3698" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Reimplementing AdaIN for OAK-1" + url: "https://github.com/Yijunmaverick/UniversalStyleTransfer" + owner: "Yijunmaverick" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 519 + description: "The source code of NIPS17 'Universal Style Transfer via Feature Transforms'." } repositories: { - url: "https://github.com/KaiyangZhou/ssdg-benchmark" - owner: "KaiyangZhou" - framework: FRAMEWORK_PYTORCH - number_of_stars: 26 - description: "Benchmarks for semi-supervised domain generalization." + url: "https://github.com/AlbanSeurat/keras-style-transfer" + owner: "AlbanSeurat" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Neural Style transfer experiment with AdaIn operator : https://arxiv.org/abs/1703.06868" } repositories: { - url: "https://github.com/KaiyangZhou/mixstyle-release" - owner: "KaiyangZhou" + url: "https://github.com/ZVK/Talking-Heads" + owner: "ZVK" framework: FRAMEWORK_PYTORCH - number_of_stars: 94 - description: "Domain Generalization with MixStyle. ICLR'21." + number_of_stars: 5 } repositories: { - url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter05" - owner: "master" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 65 - description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" + is_official: true + url: "https://github.com/xunhuang1995/AdaIN-style" + owner: "xunhuang1995" + framework: FRAMEWORK_OTHERS + number_of_stars: 1121 + description: "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" } repositories: { - url: "https://github.com/srihari-humbarwadi/adain-tensorflow2.x" - owner: "srihari-humbarwadi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "TensorFlow2.x implementation of Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" + url: "https://github.com/cynicaldevil/neural-style-transfer" + owner: "cynicaldevil" + framework: FRAMEWORK_PYTORCH + description: "Arbitrary neural style transfer implementation in PyTorch. Based on https://arxiv.org/abs/1703.06868" } repositories: { - url: "https://github.com/krisrjohnson/Realistic-Neural-Talking-Head-Models" - owner: "krisrjohnson" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/eridgd/WCT-TF" + owner: "eridgd" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 304 + description: "TensorFlow/Keras implementation of \"Universal Style Transfer via Feature Transforms\" from https://arxiv.org/abs/1705.08086" } repositories: { - url: "https://github.com/JeongsolKim/BiS400_term_project" - owner: "JeongsolKim" + url: "https://github.com/PacktPublishing/Hands-On-Image-Generation-with-TensorFlow-2.0/tree/master/Chapter05" + owner: "master" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 75 + description: "Hands-On Image Generation with TensorFlow 2.0, published by Packt" } repositories: { - url: "https://github.com/Jwrede/neural_style_transfer" - owner: "Jwrede" - framework: FRAMEWORK_PYTORCH - description: "Pytorch implementation of the paper Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" + url: "https://github.com/nhatsmrt/torch-styletransfer" + owner: "nhatsmrt" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Implementing style transfer using pytorch" } repositories: { - url: "https://github.com/times2049/talkinghead" - owner: "times2049" + url: "https://github.com/vincent-thevenin/Realistic-Neural-Talking-Head-Models" + owner: "vincent-thevenin" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 661 + description: "My implementation of Few-Shot Adversarial Learning of Realistic Neural Talking Head Models (Egor Zakharov et al.)." } repositories: { - url: "https://github.com/ptran1203/style_transfer" - owner: "ptran1203" - framework: FRAMEWORK_TENSORFLOW - description: "Arbitrary Style Transfer With Adaptive Instance Normalization" + url: "https://github.com/ZVK/talking_heads" + owner: "ZVK" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 } methods: { name: "Style Transfer Module" @@ -15601,8 +15739,8 @@ pr_id_to_video: { video: { video_id: "16BGnsIyh6M" video_title: "PR-186: Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" - number_of_likes: 23 - number_of_views: 827 + number_of_likes: 24 + number_of_views: 842 published_date: { seconds: 1565608448 } @@ -15629,33 +15767,33 @@ pr_id_to_video: { authors: "Hao Wu" authors: "Tien-Ju Yang" authors: "Edward Choi" - repositories: { - url: "https://github.com/google-research/morph-net" - owner: "google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 961 - description: "Fast & Simple Resource-Constrained Learning of Deep Network Structure" - } repositories: { url: "https://github.com/tensorflow/models" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70579 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { url: "https://github.com/NatGr/Master_Thesis" owner: "NatGr" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 + number_of_stars: 11 description: "Repo for my Master Thesis at ULiège in 2019 (Machine learning under resource constraints)" } + repositories: { + url: "https://github.com/google-research/morph-net" + owner: "google-research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 964 + description: "Fast & Simple Resource-Constrained Learning of Deep Network Structure" + } } video: { video_id: "vUNAJsO2G98" video_title: "PR-187 : MorphNet: Fast & Simple Resource-Constrained Structure Learning of Deep Networks" number_of_likes: 4 - number_of_views: 517 + number_of_views: 521 published_date: { seconds: 1565712056 } @@ -15704,51 +15842,17 @@ pr_id_to_video: { authors: "Eduard Hovy" authors: "Minh-Thang Luong" authors: "Quoc V. Le" - repositories: { - url: "https://github.com/kekmodel/UDA-pytorch" - owner: "kekmodel" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "An unofficial PyTorch implementation of Unsupervised Data Augmentation" - } - repositories: { - url: "https://github.com/A-Telfer/AugKey" - owner: "A-Telfer" - framework: FRAMEWORK_OTHERS - description: "RandAugment with Keypoints Annotation Support." - } - repositories: { - url: "https://github.com/rwbfd/OpenCompetitionV2" - owner: "rwbfd" - framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "This is a collection of convenient methods for data science competition." - } repositories: { url: "https://github.com/joannayu25/NLP_Project_MIDS-W266" owner: "joannayu25" framework: FRAMEWORK_TENSORFLOW description: "Final Project for NLP class in UC Berkeley MIDS Program W266" } - repositories: { - url: "https://github.com/leblancdaniel/paraphraser" - owner: "leblancdaniel" - framework: FRAMEWORK_TENSORFLOW - description: "paraphrasing w/ unsupervised data augmentation (source: https://github.com/google-research/uda)" - } - repositories: { - is_official: true - url: "https://github.com/google-research/uda" - owner: "google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1740 - description: "Unsupervised Data Augmentation (UDA)" - } repositories: { url: "https://github.com/SanghunYun/UDA_pytorch" owner: "SanghunYun" framework: FRAMEWORK_PYTORCH - number_of_stars: 170 + number_of_stars: 179 description: "UDA(Unsupervised Data Augmentation) implemented by pytorch" } repositories: { @@ -15771,30 +15875,68 @@ pr_id_to_video: { number_of_stars: 2 description: "Repository for Final Project for W266: Natural Language Processing with Deep Learning" } + repositories: { + url: "https://github.com/peisuke/UnsupervisedDataAugmentation.pytorch" + owner: "peisuke" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + } + repositories: { + url: "https://github.com/sud0301/my_uda_pytorch" + owner: "sud0301" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + } + repositories: { + url: "https://github.com/ildoonet/unsupervised-data-augmentation" + owner: "ildoonet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 132 + description: "Unofficial PyTorch Implementation of Unsupervised Data Augmentation." + } + repositories: { + url: "https://github.com/uizard-technologies/realmix" + owner: "uizard-technologies" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 59 + description: "Code for the paper: RealMix: Towards Realistic Semi-Supervised Deep Learning Algorithms " + } + repositories: { + url: "https://github.com/PaulEmmanuelSotir/DeepCV" + owner: "PaulEmmanuelSotir" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Vision framework which brings a more robust, Deep Learning-based approach to some usual OpenCV use cases" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "WordPiece" + full_name: "WordPiece" + description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" + } methods: { name: "Bottleneck Residual Block" full_name: "Bottleneck Residual Block" description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." } methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + name: "Linear Warmup With Linear Decay" + full_name: "Linear Warmup With Linear Decay" + description: "**Linear Warmup With Linear Decay** is a learning rate schedule in which we increase the learning rate linearly for $n$ updates and then linearly decay afterwards." } methods: { name: "BERT" @@ -15802,31 +15944,26 @@ pr_id_to_video: { description: "**BERT**, or Bidirectional Encoder Representations from Transformers, improves upon standard [Transformers](http://paperswithcode.com/method/transformer) by removing the unidirectionality constraint by using a *masked language model* (MLM) pre-training objective. The masked language model randomly masks some of the tokens from the input, and the objective is to predict the original vocabulary id of the masked word based only on its context. Unlike left-to-right language model pre-training, the MLM objective enables the representation to fuse the left and the right context, which allows us to pre-train a deep bidirectional Transformer. In addition to the masked language model, BERT uses a *next sentence prediction* task that jointly pre-trains text-pair representations. \r\n\r\nThere are two steps in BERT: *pre-training* and *fine-tuning*. During pre-training, the model is trained on unlabeled data over different pre-training tasks. For fine-tuning, the BERT model is first initialized with the pre-trained parameters, and all of the parameters are fine-tuned using labeled data from the downstream tasks. Each downstream task has separate fine-tuned models, even though they\r\nare initialized with the same pre-trained parameters." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" } methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." } } video: { video_id: "YiKn93Ud4dA" video_title: "PR-189: Unsupervised Data Augmentation for Consistency Training" number_of_likes: 18 - number_of_views: 1202 + number_of_views: 1232 published_date: { seconds: 1566745737 } @@ -15848,24 +15985,12 @@ pr_id_to_video: { } authors: "Dan Hendrycks" authors: "Kevin Gimpel" - repositories: { - url: "https://github.com/JakobCode/UncertaintyInNeuralNetworks_Resources" - owner: "JakobCode" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/sooonwoo/RotNet-OOD" - owner: "sooonwoo" - framework: FRAMEWORK_PYTORCH - description: "Self-Supervised Learning for OOD Detection (NeurIPS 2019)" - } repositories: { is_official: true url: "https://github.com/hendrycks/error-detection" owner: "hendrycks" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 135 + number_of_stars: 138 description: "A Baseline for Detecting Misclassified and Out-of-Distribution Examples in Neural Networks" } repositories: { @@ -15892,9 +16017,26 @@ pr_id_to_video: { url: "https://github.com/guyAmit/GLOD" owner: "guyAmit" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 11 description: "Github for the conference paper GLOD-Gaussian Likelihood OOD detector" } + repositories: { + url: "https://github.com/sooonwoo/RotNet-OOD" + owner: "sooonwoo" + framework: FRAMEWORK_PYTORCH + description: "Self-Supervised Learning for OOD Detection (NeurIPS 2019)" + } + repositories: { + url: "https://github.com/drumpt/RotNet-OOD" + owner: "drumpt" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/JakobCode/UncertaintyInNeuralNetworks_Resources" + owner: "JakobCode" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + } methods: { name: "Global Average Pooling" full_name: "Global Average Pooling" @@ -15949,8 +16091,8 @@ pr_id_to_video: { video: { video_id: "xaABseUoHAI" video_title: "PR-190: A Baseline For Detecting Misclassified and Out-of-Distribution Examples In Neural Networks" - number_of_likes: 10 - number_of_views: 1072 + number_of_likes: 11 + number_of_views: 1099 published_date: { seconds: 1569764236 } @@ -15974,32 +16116,39 @@ pr_id_to_video: { authors: "Elliot Creager" authors: "Toniann Pitassi" authors: "Richard Zemel" - repositories: { - url: "https://github.com/rvr-account/rvr" - owner: "rvr-account" - framework: FRAMEWORK_OTHERS - description: "Representation via Representations is a project aimed at improving transfer learning to out-of-distribution examples. Motivated by the challenge of finding robust biomedical predictors of disease, the model leverages data from heterogenous sources to discover feature representations that allow for accurate prediction outside of the training data." - } repositories: { is_official: true url: "https://github.com/VectorInstitute/laftr" owner: "VectorInstitute" framework: FRAMEWORK_OTHERS - number_of_stars: 33 + number_of_stars: 34 description: "Learning Adversarially Fair and Transferable Representations" } repositories: { url: "https://github.com/ecreager/laftr" owner: "ecreager" framework: FRAMEWORK_OTHERS - number_of_stars: 33 + number_of_stars: 34 description: "Learning Adversarially Fair and Transferable Representations" } + repositories: { + url: "https://github.com/murilo-goncalves/ML-Fairness" + owner: "murilo-goncalves" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Academic studies in Machine Learning and Fairness" + } + repositories: { + url: "https://github.com/rvr-account/rvr" + owner: "rvr-account" + framework: FRAMEWORK_OTHERS + description: "Representation via Representations is a project aimed at improving transfer learning to out-of-distribution examples. Motivated by the challenge of finding robust biomedical predictors of disease, the model leverages data from heterogenous sources to discover feature representations that allow for accurate prediction outside of the training data." + } } video: { video_id: "cgolskL-_WM" video_title: "PR-191: Learning Adversarially Fair and Transferable Representations" - number_of_views: 381 + number_of_views: 383 published_date: { seconds: 1567348971 } @@ -16023,25 +16172,6 @@ pr_id_to_video: { authors: "Ming-Yu Liu" authors: "Xiaodong Yang" authors: "Jan Kautz" - repositories: { - url: "https://github.com/ubc-vision/DwNet" - owner: "ubc-vision" - framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - } - repositories: { - url: "https://github.com/DLHacks/mocogan" - owner: "DLHacks" - framework: FRAMEWORK_PYTORCH - number_of_stars: 94 - description: "A pytorch implemention of MoCoGAN" - } - repositories: { - url: "https://github.com/vaibhavsingh9/MoCoGAN_implementation" - owner: "vaibhavsingh9" - framework: FRAMEWORK_PYTORCH - description: "Learning motion GAN's for video based generations" - } repositories: { url: "https://github.com/HappyBahman/ldvdGAN" owner: "HappyBahman" @@ -16054,21 +16184,40 @@ pr_id_to_video: { url: "https://github.com/sergeytulyakov/mocogan" owner: "sergeytulyakov" framework: FRAMEWORK_PYTORCH - number_of_stars: 437 + number_of_stars: 445 description: "MoCoGAN: Decomposing Motion and Content for Video Generation" } repositories: { url: "https://github.com/UBC-Computer-Vision-Group/DwNet" owner: "UBC-Computer-Vision-Group" framework: FRAMEWORK_PYTORCH - number_of_stars: 19 + number_of_stars: 20 + } + repositories: { + url: "https://github.com/vaibhavsingh9/MoCoGAN_implementation" + owner: "vaibhavsingh9" + framework: FRAMEWORK_PYTORCH + description: "Learning motion GAN's for video based generations" + } + repositories: { + url: "https://github.com/DLHacks/mocogan" + owner: "DLHacks" + framework: FRAMEWORK_PYTORCH + number_of_stars: 95 + description: "A pytorch implemention of MoCoGAN" + } + repositories: { + url: "https://github.com/ubc-vision/DwNet" + owner: "ubc-vision" + framework: FRAMEWORK_PYTORCH + number_of_stars: 20 } } video: { video_id: "9uNFtnRa_JU" video_title: "PR-192: MoCoGAN: Decomposing Motion and Content for Video Generation" - number_of_likes: 9 - number_of_views: 1238 + number_of_likes: 10 + number_of_views: 1274 published_date: { seconds: 1568189938 } @@ -16102,7 +16251,7 @@ pr_id_to_video: { video_id: "3KoqN_yYhmI" video_title: "PR-193: NISP: Pruning Networks using Neural Importance Score Propagation" number_of_likes: 10 - number_of_views: 574 + number_of_views: 576 published_date: { seconds: 1567953078 } @@ -16128,17 +16277,18 @@ pr_id_to_video: { authors: "Zhekai Zhang" authors: "Song Han" repositories: { - url: "https://github.com/UoS-EEC/DynamicOFA" - owner: "UoS-EEC" + url: "https://github.com/MaximIntegratedAI/ai8x-synthesis" + owner: "MaximIntegratedAI" framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "[CVPRW 2021] Dynamic-OFA: Runtime DNN Architecture Switching for Performance Scaling on Heterogeneous Embedded Platforms" + number_of_stars: 12 + description: "Quantization and Synthesis (Device Specific Code Generation) for Maxim AI Devices" } repositories: { - url: "https://github.com/twice154/ofa-for-super-resolution" - owner: "twice154" + url: "https://github.com/MaximIntegratedAI/ai8x-training" + owner: "MaximIntegratedAI" framework: FRAMEWORK_PYTORCH - description: "Image downscaling & super-resolution project based on \"Once for All: Train One Network and Specialize it for Efficient Deployment\" (ICLR 2020)" + number_of_stars: 13 + description: "Model Training for Maxim AI Devices" } repositories: { url: "https://github.com/rotx-maxim/ai8x-synthesis" @@ -16147,46 +16297,46 @@ pr_id_to_video: { description: "Quantization and Synthesis (Device Specific Code Generation) for Maxim AI Devices" } repositories: { - url: "https://github.com/MaximIntegratedAI/ai8x-training" - owner: "MaximIntegratedAI" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Model Training for Maxim AI Devices" - } - repositories: { - url: "https://github.com/MaximIntegratedAI/ai8x-synthesis" - owner: "MaximIntegratedAI" + url: "https://github.com/MIT-HAN-LAB/ProxylessNAS" + owner: "MIT-HAN-LAB" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Quantization and Synthesis (Device Specific Code Generation) for Maxim AI Devices" + number_of_stars: 1255 + description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" } repositories: { - url: "https://github.com/seulkiyeom/once-for-all" - owner: "seulkiyeom" + url: "https://github.com/mit-han-lab/ProxylessNAS" + owner: "mit-han-lab" framework: FRAMEWORK_PYTORCH - description: "Transformable NAS (based on OFA network)" + number_of_stars: 1255 + description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" } repositories: { is_official: true url: "https://github.com/mit-han-lab/once-for-all" owner: "mit-han-lab" framework: FRAMEWORK_PYTORCH - number_of_stars: 1259 + number_of_stars: 1294 description: "[ICLR 2020] Once for All: Train One Network and Specialize it for Efficient Deployment" } repositories: { - url: "https://github.com/MIT-HAN-LAB/ProxylessNAS" - owner: "MIT-HAN-LAB" + url: "https://github.com/seulkiyeom/once-for-all" + owner: "seulkiyeom" framework: FRAMEWORK_PYTORCH - number_of_stars: 1245 - description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" + description: "Transformable NAS (based on OFA network)" } repositories: { - url: "https://github.com/mit-han-lab/ProxylessNAS" - owner: "mit-han-lab" + url: "https://github.com/UoS-EEC/DynamicOFA" + owner: "UoS-EEC" framework: FRAMEWORK_PYTORCH - number_of_stars: 1245 - description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" + number_of_stars: 11 + description: "[CVPRW 2021] Dynamic-OFA: Runtime DNN Architecture Switching for Performance Scaling on Heterogeneous Embedded Platforms" + } + repositories: { + url: "https://github.com/twice154/ofa-for-super-resolution" + owner: "twice154" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Image downscaling & super-resolution project based on \"Once for All: Train One Network and Specialize it for Efficient Deployment\" (ICLR 2020)" } } } @@ -16210,74 +16360,79 @@ pr_id_to_video: { authors: "Avital Oliver" authors: "Colin Raffel" repositories: { - url: "https://github.com/Jeffkang-94/pytorch-MixMatch" + url: "https://github.com/Jeffkang-94/Mixmatch-pytorch-SSL" owner: "Jeffkang-94" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 9 description: "An unofficial PyTorch implementation of MixMatch - A Holistic Approach to Semi-Supervised Learning" } repositories: { - url: "https://github.com/google-research/crest" - owner: "google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Repo for CReST: A Class-Rebalancing Self-Training Framework for Imbalanced Semi-Supervised Learning" - } - repositories: { - url: "https://github.com/narendoraiswamy/MixMatch-pytorch-demo" - owner: "narendoraiswamy" + url: "https://github.com/smkim7-kr/albu-MixMatch-pytorch" + owner: "smkim7-kr" framework: FRAMEWORK_PYTORCH - description: "The execution of tests for mixmatch." + number_of_stars: 2 + description: "Unofficial implementation of \"MixMatch: A Holistic Approach to Semi-Supervised Learning\"" } repositories: { - url: "https://github.com/DonghwanKIM0101/CS492I_CV" - owner: "DonghwanKIM0101" + url: "https://github.com/rit-git/Snippext_public" + owner: "rit-git" framework: FRAMEWORK_PYTORCH + number_of_stars: 47 + description: "Snippext: Semi-supervised Opinion Mining with Augmented Data" } repositories: { - url: "https://github.com/ktran1/Manifold-attack" - owner: "ktran1" - framework: FRAMEWORK_PYTORCH - description: "This is an implementation of manifold attack" + url: "https://github.com/ntozer/mixmatch-tensorflow2.0" + owner: "ntozer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 12 + description: "Implementation of \"MixMatch: A Holistic Approach to Semi-Supervised Learning\" in TensorFlow 2.0" } repositories: { - url: "https://github.com/dhx000/DGM_project" - owner: "dhx000" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 + url: "https://github.com/FelixAbrahamsson/mixmatch-pytorch" + owner: "FelixAbrahamsson" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "An implementation of MixMatch with PyTorch" } repositories: { - url: "https://github.com/ms903-github/MixMatch-imdb" - owner: "ms903-github" + url: "https://github.com/atinghosh/mixmatch_pytorch" + owner: "atinghosh" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Implementation of MixMatch for semi supervised Learning on CIFAR10, SVHN, STL10 " } repositories: { - url: "https://github.com/yuxi120407/semi-supervised_tensorflow2.0" + url: "https://github.com/yuxi120407/mixmatch_tensorflow" owner: "yuxi120407" framework: FRAMEWORK_TENSORFLOW number_of_stars: 5 description: "This is an Tensorflow implementation of semi-supervised learning with the following methods: Pseudo-label, Pi_model, VAT, mean_teacher, Mixup, ICT and Mixmatch." } repositories: { - url: "https://github.com/rit-git/Snippext_public" - owner: "rit-git" + url: "https://github.com/kevinghst/mixmatch" + owner: "kevinghst" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "Snippext: Semi-supervised Opinion Mining with Augmented Data" } repositories: { - url: "https://github.com/ntozer/mixmatch-tensorflow2.0" - owner: "ntozer" + url: "https://github.com/uizard-technologies/realmix" + owner: "uizard-technologies" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 12 - description: "Implementation of \"MixMatch: A Holistic Approach to Semi-Supervised Learning\" in TensorFlow 2.0" + number_of_stars: 59 + description: "Code for the paper: RealMix: Towards Realistic Semi-Supervised Deep Learning Algorithms " + } + repositories: { + url: "https://github.com/viig99/mixmatch-freesound" + owner: "viig99" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Multi label audio classification using mixmatch & a noisy loss" } } video: { video_id: "ud863JQmUW0" video_title: "PR-195: MixMatch: A Holistic Approach to Semi-Supervised Learning" number_of_likes: 23 - number_of_views: 1436 + number_of_views: 1466 published_date: { seconds: 1569160250 } @@ -16304,47 +16459,47 @@ pr_id_to_video: { authors: "Anselm Levskaya" authors: "Jonathon Shlens" repositories: { - url: "https://github.com/MartinGer/Stand-Alone-Self-Attention-in-Vision-Models" - owner: "MartinGer" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Pytorch implementation of the paper Stand-Alone Self-Attention in Vision Models" - } - repositories: { - url: "https://github.com/MaheepChaudhary/Stand-Alone_Self-Attention" - owner: "MaheepChaudhary" + is_official: true + url: "https://github.com/google-research/google-research" + owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 - description: "Implemented the Stand-Alone Self-Attention research paper form scratch in Tensorflow" + number_of_stars: 18790 + description: "Google Research" } repositories: { - url: "https://github.com/The-AI-Summer/self_attention" - owner: "The-AI-Summer" + url: "https://github.com/JoeRoussy/adaptive-attention-in-cv" + owner: "JoeRoussy" framework: FRAMEWORK_PYTORCH - number_of_stars: 478 - description: "Implementation of various self-attention mechanisms focused on computer vision. Ongoing repository. " + number_of_stars: 25 + description: "Implementation for our paper exploring a novel 2D adaptive attention span kernel in computer vision." } repositories: { url: "https://github.com/leaderj1001/Stand-Alone-Self-Attention" owner: "leaderj1001" framework: FRAMEWORK_PYTORCH - number_of_stars: 350 + number_of_stars: 358 description: "Implementing Stand-Alone Self-Attention in Vision Models using Pytorch" } repositories: { - is_official: true - url: "https://github.com/google-research/google-research" - owner: "google-research" + url: "https://github.com/The-AI-Summer/self_attention" + owner: "The-AI-Summer" + framework: FRAMEWORK_PYTORCH + number_of_stars: 513 + description: "Implementation of various self-attention mechanisms focused on computer vision. Ongoing repository. " + } + repositories: { + url: "https://github.com/MaheepChaudhary/Stand-Alone_Self-Attention" + owner: "MaheepChaudhary" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 - description: "Google Research" + number_of_stars: 5 + description: "Implemented the Stand-Alone Self-Attention research paper form scratch in Tensorflow" } repositories: { - url: "https://github.com/JoeRoussy/adaptive-attention-in-cv" - owner: "JoeRoussy" + url: "https://github.com/MartinGer/Stand-Alone-Self-Attention-in-Vision-Models" + owner: "MartinGer" framework: FRAMEWORK_PYTORCH - number_of_stars: 25 - description: "Implementation for our paper exploring a novel 2D adaptive attention span kernel in computer vision." + number_of_stars: 1 + description: "Pytorch implementation of the paper Stand-Alone Self-Attention in Vision Models" } methods: { name: "Convolution" @@ -16401,7 +16556,7 @@ pr_id_to_video: { video_id: "6hadVw4Sy2M" video_title: "PR-196: Stand Alone Self Attention in Vision Models" number_of_likes: 9 - number_of_views: 1611 + number_of_views: 1658 published_date: { seconds: 1571072079 } @@ -16429,7 +16584,7 @@ pr_id_to_video: { url: "https://github.com/varungohil/Generalizing-Lottery-Tickets" owner: "varungohil" framework: FRAMEWORK_PYTORCH - number_of_stars: 42 + number_of_stars: 43 description: "This repository contains code to replicate the experiments given in NeurIPS 2019 paper \"One ticket to win them all: generalizing lottery ticket initializations across datasets and optimizers\"" } } @@ -16437,7 +16592,7 @@ pr_id_to_video: { video_id: "YmTNpF2OOjA" video_title: "PR-197: One ticket to win them all: generalizing lottery ticket initialization" number_of_likes: 21 - number_of_views: 1019 + number_of_views: 1029 published_date: { seconds: 1569769625 } @@ -16460,26 +16615,6 @@ pr_id_to_video: { authors: "Ji Lin" authors: "Chuang Gan" authors: "Song Han" - repositories: { - url: "https://github.com/open-mmlab/mmaction2" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1016 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" - } - repositories: { - url: "https://github.com/rijuldhir/TSM" - owner: "rijuldhir" - framework: FRAMEWORK_PYTORCH - } - repositories: { - is_official: true - url: "https://github.com/MIT-HAN-LAB/temporal-shift-module" - owner: "MIT-HAN-LAB" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1408 - description: "[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding" - } repositories: { url: "https://github.com/WavesUR/embedded_TSM" owner: "WavesUR" @@ -16501,6 +16636,26 @@ pr_id_to_video: { number_of_stars: 1 description: "Course Project for Stanford CS231n Convolutional Neural Networks for Visual Recognition" } + repositories: { + is_official: true + url: "https://github.com/MIT-HAN-LAB/temporal-shift-module" + owner: "MIT-HAN-LAB" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1447 + description: "[ICCV 2019] TSM: Temporal Shift Module for Efficient Video Understanding" + } + repositories: { + url: "https://github.com/rijuldhir/TSM" + owner: "rijuldhir" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/open-mmlab/mmaction2" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1115 + description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + } } } } @@ -16538,7 +16693,7 @@ pr_id_to_video: { url: "https://github.com/MahyarNajibi/SNIPER" owner: "MahyarNajibi" framework: FRAMEWORK_OTHERS - number_of_stars: 2644 + number_of_stars: 2643 description: "SNIPER / AutoFocus is an efficient multi-scale object detection training / inference algorithm" } methods: { @@ -16596,7 +16751,7 @@ pr_id_to_video: { video_id: "EkndN7svgUk" video_title: "PR-199: SNIPER:Efficient Multi Scale Training" number_of_likes: 16 - number_of_views: 1399 + number_of_views: 1415 published_date: { seconds: 1570377571 } @@ -16651,8 +16806,8 @@ pr_id_to_video: { video: { video_id: "BHEncY-f548" video_title: "PR-200: Online Model Distillation for Efficient Video Inference" - number_of_likes: 16 - number_of_views: 723 + number_of_likes: 17 + number_of_views: 733 published_date: { seconds: 1571035103 } @@ -16679,79 +16834,87 @@ pr_id_to_video: { authors: "Junyuan Xie" authors: "Mu Li" repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + url: "https://github.com/sherdencooper/tricks-in-deeplearning" + owner: "sherdencooper" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18 + description: "Using different tricks to improve performance of resetnet. The final accuracy:95.21%" } repositories: { - url: "https://github.com/Tirth27/Skin-Cancer-Classification-using-Deep-Learning" - owner: "Tirth27" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/PaddlePaddle/models" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 6098 + description: "Pre-trained and Reproduced Deep Learning Models (『飞桨』官方模型库,包含多种学术前沿和工业场景验证的深度学习模型)" + } + repositories: { + url: "https://github.com/Dmitrsl/Tools" + owner: "Dmitrsl" + framework: FRAMEWORK_OTHERS number_of_stars: 1 - description: "Classify Skin cancer from the skin lesion images using Image classification. The dataset for the project is obtained from the Kaggle SIIM-ISIC-Melanoma-Classification competition. " } repositories: { - url: "https://github.com/Media-Smart/vedaseg" - owner: "Media-Smart" - framework: FRAMEWORK_PYTORCH - number_of_stars: 381 - description: "A semantic segmentation toolbox based on PyTorch" + url: "https://github.com/tyohei/chainerkfac" + owner: "tyohei" + framework: FRAMEWORK_OTHERS + number_of_stars: 19 + description: "A Chainer extension for K-FAC" } repositories: { - url: "https://github.com/seermer/TensorFlow2-EfficientNetV2" - owner: "seermer" + url: "https://github.com/PistonY/ResidualAttentionNetwork" + owner: "PistonY" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 9 - description: "a TensorFlow2(keras model) implementation of EfficientNetV2" + number_of_stars: 96 + description: "A Gluon implement of Residual Attention Network. Best acc on cifar10-97.78%." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" + url: "https://github.com/cinastanbean/Pytorch-Multi-Task-Multi-class-Classification" + owner: "cinastanbean" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + number_of_stars: 26 + description: "旨在搭建一个分类问题在Pytorch框架下的通解,批量解决单任务多分类问题、多任务多分类问题。" } repositories: { - url: "https://github.com/qingyuanchen1997/Bag-of-Tricks" - owner: "qingyuanchen1997" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "The reproduction of paper \"Bag of Tricks for Image Classification with Convolutional Neural Networks\" (based on Pyorch)" + url: "https://github.com/PaddlePaddle/PaddleClas" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2166 + description: "A treasure chest for visual recognition powered by PaddlePaddle" } repositories: { - url: "https://github.com/jameswang287/Car-Detection" - owner: "jameswang287" + url: "https://github.com/seominseok0429/pytorch-warmup-cosine-lr" + owner: "seominseok0429" framework: FRAMEWORK_PYTORCH - description: "Using the Stanford cars dataset and PyTorch/Resnet-34 to predict a car's make and model." + number_of_stars: 39 } repositories: { - is_official: true - url: "https://github.com/dmlc/gluon-cv" - owner: "dmlc" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4821 - description: "Gluon CV Toolkit" + url: "https://github.com/TotalVariation/Flattenet" + owner: "TotalVariation" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "A pytorch implementation of Flattenet." } repositories: { - url: "https://github.com/sherdencooper/tricks-in-deeplearning" - owner: "sherdencooper" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18 - description: "Using different tricks to improve performance of resetnet. The final accuracy:95.21%" + url: "https://github.com/pshashk/seesaw-facenet" + owner: "pshashk" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "SeesawFaceNets: sparse and robust face verification model for mobile platform" } - repositories: { - url: "https://github.com/PaddlePaddle/models" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 6037 - description: "Pre-trained and Reproduced Deep Learning Models (『飞桨』官方模型库,包含多种学术前沿和工业场景验证的深度学习模型)" + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Nesterov Accelerated Gradient" - full_name: "Nesterov Accelerated Gradient" - description: "**Nesterov Accelerated Gradient** is a momentum-based SGD optimizer that \"looks ahead\" to where the parameters will be to calculate the gradient **ex post** rather than **ex ante**:\r\n\r\n$$ v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta-\\gamma{v\\_{t-1}}\\right) $$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} + v\\_{t}$$\r\n\r\nLike SGD with momentum $\\gamma$ is usually set to $0.9$.\r\n\r\nThe intuition is that the [standard momentum](https://paperswithcode.com/method/sgd-with-momentum) method first computes the gradient at the current location and then takes a big jump in the direction of the updated accumulated gradient. In contrast Nesterov momentum first makes a big jump in the direction of the previous accumulated gradient and then measures the gradient where it ends up and makes a correction. The idea being that it is better to correct a mistake after you have made it. \r\n\r\nImage Source: [Geoff Hinton lecture notes](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)" + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "ResNet-D" + full_name: "ResNet-D" + description: "**ResNet-D** is a modification on the [ResNet](https://paperswithcode.com/method/resnet) architecture that utilises an average pooling tweak for downsampling. The motivation is that in the unmodified ResNet, the 1 × 1 convolution for the downsampling block ignores 3/4 of input feature maps, so this is modified so no information will be ignored" } methods: { name: "Mixup" @@ -16759,9 +16922,9 @@ pr_id_to_video: { description: "**Mixup** is a data augmentation technique that that generates a weighted combinations of random image pairs from the training data. Given two images and their ground truth labels: $\\left(x\\_{i}, y\\_{i}\\right), \\left(x\\_{j}, y\\_{j}\\right)$, a synthetic training example $\\left(\\hat{x}, \\hat{y}\\right)$ is generated as:\r\n\r\n$$ \\hat{x} = \\lambda{x\\_{i}} + \\left(1 − \\lambda\\right){x\\_{j}} $$\r\n$$ \\hat{y} = \\lambda{y\\_{i}} + \\left(1 − \\lambda\\right){y\\_{j}} $$\r\n\r\nwhere $\\lambda \\sim \\text{Beta}\\left(\\alpha = 0.2\\right)$ is independently sampled for each augmented example." } methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { name: "Cosine Annealing" @@ -16769,41 +16932,31 @@ pr_id_to_video: { description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" } methods: { - name: "Random Horizontal Flip" - full_name: "Random Horizontal Flip" - description: "**RandomHorizontalFlip** is a type of image data augmentation which horizontally flips a given image with a given probability.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" - } - methods: { - name: "Label Smoothing" - full_name: "Label Smoothing" - description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "Xavier Initialization" + full_name: "Xavier Initialization" + description: "**Xavier Initialization**, or **Glorot Initialization**, is an initialization scheme for neural networks. Biases are initialized be 0 and the weights $W\\_{ij}$ at each layer are initialized as:\r\n\r\n$$ W\\_{ij} \\sim U\\left[-\\frac{1}{\\sqrt{n}}, \\frac{1}{\\sqrt{n}}\\right] $$\r\n\r\nWhere $U$ is a uniform distribution and $n$ is the size of the previous layer (number of columns in $W$)." } methods: { - name: "ResNet-D" - full_name: "ResNet-D" - description: "**ResNet-D** is a modification on the [ResNet](https://paperswithcode.com/method/resnet) architecture that utilises an average pooling tweak for downsampling. The motivation is that in the unmodified ResNet, the 1 × 1 convolution for the downsampling block ignores 3/4 of input feature maps, so this is modified so no information will be ignored" + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" } } video: { video_id: "D-baIgejA4M" video_title: "PR-201: Bag of Tricks for Image Classification with Convolutional Neural Networks" number_of_likes: 48 - number_of_views: 8847 + number_of_views: 8966 published_date: { seconds: 1571580127 } @@ -16836,7 +16989,7 @@ pr_id_to_video: { video_id: "iR7T3lH20gI" video_title: "PR-202: Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts" number_of_likes: 14 - number_of_views: 1061 + number_of_views: 1090 published_date: { seconds: 1571582247 } @@ -16861,39 +17014,25 @@ pr_id_to_video: { authors: "Tsung-Yi Lin" authors: "Yang Song" authors: "Serge Belongie" - repositories: { - url: "https://github.com/tiagoCuervo/JapaNet" - owner: "tiagoCuervo" - framework: FRAMEWORK_TENSORFLOW - description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" - } - repositories: { - is_official: true - url: "https://github.com/richardaecn/class-balanced-loss" - owner: "richardaecn" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 455 - description: "Class-Balanced Loss Based on Effective Number of Samples. CVPR 2019" - } repositories: { url: "https://github.com/frgfm/Holocron" owner: "frgfm" framework: FRAMEWORK_PYTORCH - number_of_stars: 118 + number_of_stars: 125 description: "PyTorch implementations of recent Computer Vision tricks (ReXNet, RepVGG, Unet3p, YOLOv4, CIoU loss, AdaBelief)" } repositories: { url: "https://github.com/vandit15/Class-balanced-loss-pytorch" owner: "vandit15" framework: FRAMEWORK_PYTORCH - number_of_stars: 517 + number_of_stars: 534 description: "Pytorch implementation of the paper \"Class-Balanced Loss Based on Effective Number of Samples\"" } repositories: { url: "https://github.com/statsu1990/yoto_class_balanced_loss" owner: "statsu1990" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + number_of_stars: 9 description: "Unofficial implementation of YOTO (You Only Train Once) applied to Class balanced loss" } repositories: { @@ -16903,12 +17042,26 @@ pr_id_to_video: { number_of_stars: 15 description: "Adjust Decision Boundary for Class Imbalanced Learning" } + repositories: { + is_official: true + url: "https://github.com/richardaecn/class-balanced-loss" + owner: "richardaecn" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 467 + description: "Class-Balanced Loss Based on Effective Number of Samples. CVPR 2019" + } + repositories: { + url: "https://github.com/tiagoCuervo/JapaNet" + owner: "tiagoCuervo" + framework: FRAMEWORK_TENSORFLOW + description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" + } } video: { video_id: "3hL0uVtJrXM" video_title: "PR-203 : Class-Balanced Loss Based on Effective Number of Samples" number_of_likes: 15 - number_of_views: 1301 + number_of_views: 1330 published_date: { seconds: 1572183724 } @@ -16935,18 +17088,6 @@ pr_id_to_video: { authors: "Phil Bachman" authors: "Adam Trischler" authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/jqhoogland/rgpy" - owner: "jqhoogland" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Renormalization Group techniques implemented in python with special emphasis on Machine Learning-inspired methods." - } - repositories: { - url: "https://github.com/jtlai0921/infomax" - owner: "jtlai0921" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/HolenYHR/Deepinfo_pytorch" owner: "HolenYHR" @@ -16961,19 +17102,26 @@ pr_id_to_video: { number_of_stars: 118 description: "extract features by maximizing mutual information" } + repositories: { + url: "https://github.com/jqhoogland/rgpy" + owner: "jqhoogland" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Renormalization Group techniques implemented in python with special emphasis on Machine Learning-inspired methods." + } repositories: { is_official: true url: "https://github.com/rdevon/DIM" owner: "rdevon" framework: FRAMEWORK_PYTORCH - number_of_stars: 654 + number_of_stars: 662 description: "Deep InfoMax (DIM), or \"Learning Deep Representations by Mutual Information Estimation and Maximization\"" } repositories: { url: "https://github.com/DuaneNielsen/DeepInfomaxPytorch" owner: "DuaneNielsen" framework: FRAMEWORK_PYTORCH - number_of_stars: 237 + number_of_stars: 239 description: "Learning deep representations by mutual information estimation and maximization" } repositories: { @@ -16996,12 +17144,17 @@ pr_id_to_video: { number_of_stars: 13 description: "Code for the paper: Learning Adversarially Robust Representations via Worst-Case Mutual Information Maximization (https://arxiv.org/abs/2002.11798)" } + repositories: { + url: "https://github.com/jtlai0921/infomax" + owner: "jtlai0921" + framework: FRAMEWORK_TENSORFLOW + } } video: { video_id: "YNicvevmByo" video_title: "PR-204: Learning deep representations by mutual information estimation and maximization" - number_of_likes: 30 - number_of_views: 2517 + number_of_likes: 32 + number_of_views: 2586 published_date: { seconds: 1572789342 } @@ -17026,23 +17179,11 @@ pr_id_to_video: { authors: "Zsolt Kira" authors: "Yu-Chiang Frank Wang" authors: "Jia-Bin Huang" - repositories: { - url: "https://github.com/mikehuisman/revisiting-learned-optimizers" - owner: "mikehuisman" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/yinboc/few-shot-meta-baseline" - owner: "yinboc" - framework: FRAMEWORK_PYTORCH - number_of_stars: 307 - description: "A New Meta-Baseline for Few-Shot Learning" - } repositories: { url: "https://github.com/cyvius96/few-shot-meta-baseline" owner: "cyvius96" framework: FRAMEWORK_PYTORCH - number_of_stars: 307 + number_of_stars: 314 description: "A New Meta-Baseline for Few-Shot Learning" } repositories: { @@ -17050,15 +17191,27 @@ pr_id_to_video: { url: "https://github.com/wyharveychen/CloserLookFewShot" owner: "wyharveychen" framework: FRAMEWORK_PYTORCH - number_of_stars: 843 + number_of_stars: 849 description: "source code to ICLR'19, 'A Closer Look at Few-shot Classification' " } + repositories: { + url: "https://github.com/yinboc/few-shot-meta-baseline" + owner: "yinboc" + framework: FRAMEWORK_PYTORCH + number_of_stars: 314 + description: "A New Meta-Baseline for Few-Shot Learning" + } + repositories: { + url: "https://github.com/mikehuisman/revisiting-learned-optimizers" + owner: "mikehuisman" + framework: FRAMEWORK_PYTORCH + } } video: { video_id: "yyqZ1K5u2_8" video_title: "PR-205: A Closer Look at Few Shot Classification" - number_of_likes: 27 - number_of_views: 2190 + number_of_likes: 28 + number_of_views: 2238 published_date: { seconds: 1573496397 } @@ -17081,73 +17234,73 @@ pr_id_to_video: { authors: "Shaoshuai Shi" authors: "Xiaogang Wang" authors: "Hongsheng Li" - repositories: { - url: "https://github.com/cxy1997/3D_adapt_auto_driving" - owner: "cxy1997" - framework: FRAMEWORK_PYTORCH - number_of_stars: 55 - } - repositories: { - url: "https://github.com/direcf/pointrcnn_multiclass" - owner: "direcf" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "PointRCNN_multiclass" - } - repositories: { - url: "https://github.com/jskim808/js_pointrcnn" - owner: "jskim808" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/carterprice2/Deep_Learning_project" - owner: "carterprice2" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Modified 3D object detectors [F-ConvNet and PointRCNN] for Car detection on Kitti dataset" - } repositories: { is_official: true url: "https://github.com/sshaoshuai/PointRCNN" owner: "sshaoshuai" framework: FRAMEWORK_PYTORCH - number_of_stars: 1267 + number_of_stars: 1280 description: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud, CVPR 2019." } repositories: { url: "https://github.com/ModelBunker/PointRCNN-PyTorch" owner: "ModelBunker" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 6 description: "PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" } repositories: { url: "https://github.com/sshaoshuai/Pointnet2.PyTorch" owner: "sshaoshuai" framework: FRAMEWORK_PYTORCH - number_of_stars: 291 + number_of_stars: 292 description: "A faster implementation of PointNet++ based on PyTorch." } repositories: { url: "https://github.com/sshaoshuai/PointCloudDet3D" owner: "sshaoshuai" framework: FRAMEWORK_PYTORCH - number_of_stars: 1822 + number_of_stars: 1897 description: "OpenPCDet Toolbox for LiDAR-based 3D Object Detection." } repositories: { url: "https://github.com/open-mmlab/OpenPCDet" owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 1822 + number_of_stars: 1897 description: "OpenPCDet Toolbox for LiDAR-based 3D Object Detection." } + repositories: { + url: "https://github.com/carterprice2/Deep_Learning_project" + owner: "carterprice2" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Modified 3D object detectors [F-ConvNet and PointRCNN] for Car detection on Kitti dataset" + } + repositories: { + url: "https://github.com/jskim808/js_pointrcnn" + owner: "jskim808" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/cxy1997/3D_adapt_auto_driving" + owner: "cxy1997" + framework: FRAMEWORK_PYTORCH + number_of_stars: 58 + } + repositories: { + url: "https://github.com/direcf/pointrcnn_multiclass" + owner: "direcf" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "PointRCNN_multiclass" + } } video: { video_id: "sFN_EgCsNzM" video_title: "PR-206: PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud" number_of_likes: 39 - number_of_views: 2833 + number_of_views: 2970 published_date: { seconds: 1573396201 } @@ -17170,68 +17323,70 @@ pr_id_to_video: { authors: "Joseph Redmon" authors: "Ali Farhadi" repositories: { - url: "https://github.com/ArtLabss/tennis-tracking" - owner: "ArtLabss" - framework: FRAMEWORK_OTHERS - number_of_stars: 4 + url: "https://github.com/liulianjushi/yolo_v3_tf2.0" + owner: "liulianjushi" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 } repositories: { - url: "https://github.com/saransh317/YOLOv3-Easy-Implementation" - owner: "saransh317" + url: "https://github.com/RobotMobile/cv-deep-learning-paper-review" + owner: "RobotMobile" framework: FRAMEWORK_OTHERS - number_of_stars: 1 + number_of_stars: 2 } repositories: { - url: "https://github.com/westerndigitalcorporation/YOLOv3-in-PyTorch" - owner: "westerndigitalcorporation" + url: "https://github.com/sergiosonline/transfercv" + owner: "sergiosonline" framework: FRAMEWORK_PYTORCH - number_of_stars: 84 - description: "YOLOv3 in PyTorch with training and inference module implemented." - } - repositories: { - url: "https://github.com/jayin301/2021AutonomousACCar" - owner: "jayin301" - framework: FRAMEWORK_OTHERS + description: "Transfer learning and CV - Drones vs Airplanes" } repositories: { - url: "https://github.com/theAIGuysCode/yolov3_deepsort" - owner: "theAIGuysCode" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 235 - description: "Object tracking implemented with YOLOv3, Deep Sort and Tensorflow." + url: "https://github.com/fofore/yolov3-pyroch" + owner: "fofore" + framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" + url: "https://github.com/lmeulen/PeopleCounter" + owner: "lmeulen" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + number_of_stars: 6 + description: "Count people in a video stream with YOLO" } repositories: { - url: "https://github.com/DevBruce/YOLOv3-TF2" - owner: "DevBruce" - framework: FRAMEWORK_TENSORFLOW - description: "YOLOv3 implementation with TensorFlow2" + url: "https://github.com/DarkGeekMS/Pytorch-YOLOv3-Implementation" + owner: "DarkGeekMS" + framework: FRAMEWORK_PYTORCH + description: "An Implementation of YOLOv3 Object Detection Architecture using Pytorch Deep Learning Framework" } repositories: { - url: "https://github.com/Qengineering/YoloV3-ncnn-Jetson-Nano" - owner: "Qengineering" - framework: FRAMEWORK_OTHERS + url: "https://github.com/Kev1nZheng/yolov_mask" + owner: "Kev1nZheng" + framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "YoloV3 for Jetson Nano" } repositories: { - url: "https://github.com/CRIGIM/darknet" - owner: "CRIGIM" + url: "https://github.com/michhar/azureml-keras-yolov3-custom" + owner: "michhar" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Keras implementation of YOLO v3 for object detection with training and deployment in Azure ML." + } + repositories: { + url: "https://github.com/Stick-To/YOLO-TF" + owner: "Stick-To" framework: FRAMEWORK_TENSORFLOW - description: "edited darknet" + number_of_stars: 11 + description: "YOLOv2 YOLOv3 in pure tensorflow" } repositories: { - url: "https://github.com/zgcr/simpleAICV-pytorch-ImageNet-COCO-training" - owner: "zgcr" + url: "https://github.com/vobecant/yolov3_WannaSeeU" + owner: "vobecant" framework: FRAMEWORK_PYTORCH - number_of_stars: 162 - description: "Training examples and results for ImageNet(ILSVRC2012)/COCO2017/VOC2007+VOC2012 datasets.Include ResNet/DarkNet/RegNet/RetinaNet/FCOS/CenterNet/YOLO series." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { name: "RetinaNet" @@ -17263,32 +17418,26 @@ pr_id_to_video: { full_name: "Global Average Pooling" description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." } + methods: { + name: "Darknet-53" + full_name: "Darknet-53" + description: "**Darknet-53** is a convolutional neural network that acts as a backbone for the [YOLOv3](https://paperswithcode.com/method/yolov3) object detection approach. The improvements upon its predecessor [Darknet-19](https://paperswithcode.com/method/darknet-19) include the use of residual connections, as well as more layers." + } methods: { name: "Residual Connection" full_name: "Residual Connection" description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Logistic Regression" - full_name: "Logistic Regression" - description: "**Logistic Regression**, despite its name, is a linear model for classification rather than regression. Logistic regression is also known in the literature as logit regression, maximum-entropy classification (MaxEnt) or the log-linear classifier. In this model, the probabilities describing the possible outcomes of a single trial are modeled using a logistic function.\r\n\r\nSource: [scikit-learn](https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression)\r\n\r\nImage: [Michaelg2015](https://commons.wikimedia.org/wiki/User:Michaelg2015)" - } - methods: { - name: "k-Means Clustering" - full_name: "k-Means Clustering" - description: "**k-Means Clustering** is a clustering algorithm that divides a training set into $k$ different clusters of examples that are near each other. It works by initializing $k$ different centroids {$\\mu\\left(1\\right),\\ldots,\\mu\\left(k\\right)$} to different values, then alternating between two steps until convergence:\r\n\r\n(i) each training example is assigned to cluster $i$ where $i$ is the index of the nearest centroid $\\mu^{(i)}$\r\n\r\n(ii) each centroid $\\mu^{(i)}$ is updated to the mean of all training examples $x^{(j)}$ assigned to cluster $i$.\r\n\r\nText Source: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [scikit-learn](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_digits.html)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Fast-YOLOv3" + full_name: "Fast-YOLOv3" } } video: { video_id: "HMgcvgRrDcA" video_title: "PR-207: YOLOv3: An Incremental Improvement" - number_of_likes: 117 - number_of_views: 7474 + number_of_likes: 118 + number_of_views: 7718 published_date: { seconds: 1574001134 } @@ -17312,28 +17461,28 @@ pr_id_to_video: { authors: "Xiaohua Zhai" authors: "Lucas Beyer" repositories: { - url: "https://github.com/virtualgraham/sc_patch" - owner: "virtualgraham" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - } - repositories: { - url: "https://github.com/moabitcoin/ssvr" - owner: "moabitcoin" + url: "https://github.com/rickyHong/Puzzle-tensorflow-latest-repl" + owner: "rickyHong" framework: FRAMEWORK_TENSORFLOW - description: "Rotations gonna rotate, potatoes gonna potate." } repositories: { is_official: true url: "https://github.com/google/revisiting-self-supervised" owner: "google" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 332 + number_of_stars: 334 } repositories: { - url: "https://github.com/rickyHong/Puzzle-tensorflow-latest-repl" - owner: "rickyHong" + url: "https://github.com/moabitcoin/ssvr" + owner: "moabitcoin" framework: FRAMEWORK_TENSORFLOW + description: "Rotations gonna rotate, potatoes gonna potate." + } + repositories: { + url: "https://github.com/virtualgraham/sc_patch" + owner: "virtualgraham" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 } methods: { name: "Global Average Pooling" @@ -17389,8 +17538,8 @@ pr_id_to_video: { video: { video_id: "eDDHsbMgOJQ" video_title: "PR-208: Unsupervised Visual Representation Learning Overview:Toward Self-Supervision" - number_of_likes: 62 - number_of_views: 3615 + number_of_likes: 63 + number_of_views: 3738 published_date: { seconds: 1574002434 } @@ -17418,7 +17567,7 @@ pr_id_to_video: { url: "https://github.com/TheShadow29/zsgnet-pytorch" owner: "TheShadow29" framework: FRAMEWORK_PYTORCH - number_of_stars: 56 + number_of_stars: 59 description: "Official implementation of ICCV19 oral paper Zero-Shot grounding of Objects from Natural Language Queries (https://arxiv.org/abs/1908.07129)" } } @@ -17514,7 +17663,7 @@ pr_id_to_video: { url: "https://github.com/zyang-ur/ReSC" owner: "zyang-ur" framework: FRAMEWORK_PYTORCH - number_of_stars: 40 + number_of_stars: 41 description: "Improving One-stage Visual Grounding by Recursive Sub-query Construction, ECCV 2020" } methods: { @@ -17586,7 +17735,7 @@ pr_id_to_video: { url: "https://github.com/ChenyunWu/PhraseCutDataset" owner: "ChenyunWu" framework: FRAMEWORK_OTHERS - number_of_stars: 40 + number_of_stars: 43 description: "Dataset API for \"PhraseCut: Language-based Image Segmentation in the Wild\"" } } @@ -17657,7 +17806,7 @@ pr_id_to_video: { video_id: "P3aod0Ops2I" video_title: "PR-209: Zero-Shot Grounding of Objects from Natural Language Queries" number_of_likes: 5 - number_of_views: 552 + number_of_views: 560 published_date: { seconds: 1575202630 } @@ -17682,11 +17831,11 @@ pr_id_to_video: { authors: "Eduard Hovy" authors: "Quoc V. Le" repositories: { - url: "https://github.com/mhd-medfa/NoisyStudent-Based-Object-Recognition" - owner: "mhd-medfa" + url: "https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet" + owner: "official" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "3rd place solution" + number_of_stars: 4415 + description: "Reference models and tools for Cloud TPUs." } repositories: { url: "https://github.com/stanleyjzheng/PyData" @@ -17696,37 +17845,45 @@ pr_id_to_video: { description: "Accompanying notebook and sources to \"A Guide to Pseudolabelling: How to get a Kaggle medal with only one model\" (Dec. 2020 PyData Boston-Cambridge Keynote)" } repositories: { - url: "https://github.com/thomasly/PaperTranslation" - owner: "thomasly" + url: "https://github.com/adventure2165/Summarization_self-training_with_noisy_student_improves_imagenet_classification" + owner: "adventure2165" framework: FRAMEWORK_OTHERS - description: "Translations for collections of English papers" - } - repositories: { - url: "https://github.com/xultaeculcis/coral-net" - owner: "xultaeculcis" - framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } repositories: { - url: "https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet" - owner: "official" + is_official: true + url: "https://github.com/tensorflow/tpu" + owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 + number_of_stars: 4415 description: "Reference models and tools for Cloud TPUs." } - repositories: { - url: "https://github.com/adventure2165/Summarization_self-training_with_noisy_student_improves_imagenet_classification" - owner: "adventure2165" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - } repositories: { is_official: true url: "https://github.com/google-research/noisystudent" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 557 + number_of_stars: 578 description: "Code for Noisy Student Training. https://arxiv.org/abs/1911.04252" } + repositories: { + url: "https://github.com/mhd-medfa/NoisyStudent-Based-Object-Recognition" + owner: "mhd-medfa" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "3rd place solution" + } + repositories: { + url: "https://github.com/xultaeculcis/coral-net" + owner: "xultaeculcis" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/thomasly/PaperTranslation" + owner: "thomasly" + framework: FRAMEWORK_OTHERS + description: "Translations for collections of English papers" + } methods: { name: "RandAugment" full_name: "RandAugment" @@ -17852,7 +18009,7 @@ pr_id_to_video: { } papers: { paper_id: "slimipl-language-model-free-iterative-pseudo" - title: "slimIPL: Language-Model-Free Iterative Pseudo-Labeling" + title: "SlimIPL: Language-Model-Free Iterative Pseudo-Labeling" arxiv_id: "2010.11524" abstract: "Recent results in end-to-end automatic speech recognition have demonstrated the efficacy of pseudo-labeling for semi-supervised models trained both with Connectionist Temporal Classification (CTC) and Sequence-to-Sequence (seq2seq) losses. Iterative Pseudo-Labeling (IPL), which continuously trains a single model using pseudo-labels iteratively re-generated as the model learns, has been shown to further improve performance in ASR. We improve upon the IPL algorithm: as the model learns, we propose to iteratively re-generate transcriptions with hard labels (the most probable tokens), that is, without a language model. We call this approach Language-Model-Free IPL (slimIPL) and give a resultant training setup for low-resource settings with CTC-based models. slimIPL features a dynamic cache for pseudo-labels which reduces sensitivity to changes in relabeling hyperparameters and results in improves training stability. slimIPL is also highly-efficient and requires 3.5-4x fewer computational resources to converge than other state-of-the-art semi/self-supervised approaches. With only 10 hours of labeled audio, slimIPL is competitive with self-supervised approaches, and is state-of-the-art with 100 hours of labeled audio without the use of a language model both at test time and during pseudo-label generation." published_date: { @@ -17883,6 +18040,11 @@ pr_id_to_video: { full_name: "Sequence to Sequence" description: "**Seq2Seq**, or **Sequence To Sequence**, is a model used in sequence prediction tasks, such as language modelling and machine translation. The idea is to use one LSTM, the *encoder*, to read the input sequence one timestep at a time, to obtain a large fixed dimensional vector representation (a context vector), and then to use another LSTM, the *decoder*, to extract the output sequence\r\nfrom that vector. The second LSTM is essentially a recurrent neural network language model except that it is conditioned on the input sequence.\r\n\r\n(Note that this page refers to the original seq2seq not general sequence-to-sequence models)" } + methods: { + name: "IPL" + full_name: "Iterative Pseudo-Labeling" + description: "**Iterative Pseudo-Labeling** (IPL) is a semi-supervised algorithm for speech recognition which efficiently performs multiple iterations of pseudo-labeling on unlabeled data as the acoustic model evolves. In particular, IPL fine tunes an existing model at each iteration using both labeled data and a subset of unlabeled data." + } } papers: { paper_id: "improved-noisy-student-training-for-automatic" @@ -17940,7 +18102,7 @@ pr_id_to_video: { url: "https://github.com/bethgelab/robustness" owner: "bethgelab" framework: FRAMEWORK_PYTORCH - number_of_stars: 48 + number_of_stars: 51 description: "Robustness and adaptation of ImageNet scale models. Pre-Release, stay tuned for updates." } methods: { @@ -17998,7 +18160,7 @@ pr_id_to_video: { video_id: "3OqSPvwTkaQ" video_title: "PR-210: Self-training with Noisy Student improves ImageNet classification" number_of_likes: 9 - number_of_views: 1163 + number_of_views: 1181 published_date: { seconds: 1575014400 } @@ -18027,7 +18189,7 @@ pr_id_to_video: { url: "https://github.com/mp2893/mime" owner: "mp2893" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 86 + number_of_stars: 88 description: "MiME Repository" } } @@ -18060,7 +18222,7 @@ pr_id_to_video: { url: "https://github.com/sajaddarabi/TAPER" owner: "sajaddarabi" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + number_of_stars: 9 description: "Patient Code & Text Representation Learning " } repositories: { @@ -18068,7 +18230,7 @@ pr_id_to_video: { url: "https://github.com/sajaddarabi/TAPER-EHR" owner: "sajaddarabi" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + number_of_stars: 9 description: "Patient Code & Text Representation Learning " } methods: { @@ -18245,7 +18407,7 @@ pr_id_to_video: { video: { video_id: "8A7_0zS3h0I" video_title: "PR-211: MiME: Multilevel Medical Embedding of Electronic Health Records for Predictive Healthcare" - number_of_views: 552 + number_of_views: 563 published_date: { seconds: 1575212192 } @@ -18271,14 +18433,14 @@ pr_id_to_video: { url: "https://github.com/google/brain-tokyo-workshop/tree/master/WANNRelease" owner: "master" framework: FRAMEWORK_OTHERS - number_of_stars: 875 + number_of_stars: 878 description: "🧠🗼" } repositories: { url: "https://github.com/IpsumDominum/super-brain-weight-agnostic-neural-networks" owner: "IpsumDominum" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 + number_of_stars: 10 description: "Weight Agnostic Neural Networks (in Python)" } } @@ -18319,67 +18481,71 @@ pr_id_to_video: { authors: "Jonathan Frankle" authors: "Michael Carbin" repositories: { - url: "https://github.com/phiandark/SiftingFeatures" - owner: "phiandark" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Code for the paper \"Sifting out the features by pruning: Are convolutional networks the winning lottery ticket of fully connected ones?\"" - } - repositories: { - url: "https://github.com/hdo0947/Lottery-Ticket-Hypothesis" - owner: "hdo0947" + url: "https://github.com/gcastex/PruNet" + owner: "gcastex" framework: FRAMEWORK_PYTORCH - description: "Project with Jack Weitze" + number_of_stars: 13 + description: "Pruning applied to Facial Recognition." } repositories: { - url: "https://github.com/JingtongSu/sanity-checking-pruning" - owner: "JingtongSu" + url: "https://github.com/reallygooday/60daysofudacity" + owner: "reallygooday" framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "Code for Sanity-Checking Pruning Methods: Random Tickets can Win the Jackpot" + number_of_stars: 7 + description: "Pledged to engage with the topics of SPAIC Program for at least 30 minutes per day for 60 days." } repositories: { - url: "https://github.com/ARMargolis/melanoma-pytorch" - owner: "ARMargolis" + url: "https://github.com/emerali/LottoRBM" + owner: "emerali" framework: FRAMEWORK_PYTORCH - description: "Development of a PyTorch model for Kaggle melanoma competition" + number_of_stars: 2 } repositories: { - url: "https://github.com/zhangtj1996/lottery-ticket-hypothesis-Mxnet" - owner: "zhangtj1996" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "A reimplementation of \"The Lottery Ticket Hypothesis\" (Frankle and Carbin) by Mxnet for FC network." + url: "https://github.com/matthew-mcateer/Keras_pruning" + owner: "matthew-mcateer" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 13 + description: "A walkthrough of how to prune keras models, using both weight-pruning and unit/neuron-pruning." } repositories: { - url: "https://github.com/Taoudi/LotteryTicketHypothesis" - owner: "Taoudi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Kevin Ammouri and Youssef Taoudi" + url: "https://github.com/facebookresearch/open_lth" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 440 + description: "A repository in preparation for open-sourcing lottery ticket hypothesis code." } repositories: { - url: "https://github.com/COMP6248-Reproducability-Challenge/REPRODUCIBILITY-REPORT-THE-LOTTERY-TICKET-HYPOTHESIS" - owner: "COMP6248-Reproducability-Challenge" + url: "https://github.com/ARMargolis/melanoma-pytorch" + owner: "ARMargolis" framework: FRAMEWORK_PYTORCH + description: "Development of a PyTorch model for Kaggle melanoma competition" } repositories: { - url: "https://github.com/Theys96/lottery-ticket-hypothesis" - owner: "Theys96" + url: "https://github.com/uber-research/deconstructing-lottery-tickets" + owner: "uber-research" framework: FRAMEWORK_TENSORFLOW - description: "Experimentation setup for the \"Lottery Ticket\" hypothesis for neural networks." + number_of_stars: 104 } repositories: { - url: "https://github.com/Happy-Virus-IkBeom/LTH_Tensorflow" - owner: "Happy-Virus-IkBeom" + url: "https://github.com/Mraksu/Lottery-Ticket" + owner: "Mraksu" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Experiments about Lottery Ticket Hypothesis" } repositories: { - url: "https://github.com/kenichdietrich/LotteryTicketHypothesis" - owner: "kenichdietrich" + url: "https://github.com/google-research/lottery-ticket-hypothesis" + owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "Codes to perform LTH with Keras and Tensorflow" + number_of_stars: 625 + description: "A reimplementation of \"The Lottery Ticket Hypothesis\" (Frankle and Carbin) on MNIST." + } + repositories: { + url: "https://github.com/rahulvigneswaran/Lottery-Ticket-Hypothesis-in-Pytorch" + owner: "rahulvigneswaran" + framework: FRAMEWORK_PYTORCH + number_of_stars: 176 + description: "This repository contains a Pytorch implementation of the paper \"The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks\" by Jonathan Frankle and Michael Carbin that can be easily adapted to any model/dataset." } } papers: { @@ -18443,7 +18609,7 @@ pr_id_to_video: { video_id: "7pR_74R4bT0" video_title: "PR-212: Weight Agnostic Neural Networks" number_of_likes: 9 - number_of_views: 492 + number_of_views: 493 published_date: { seconds: 1575817583 } @@ -18467,20 +18633,20 @@ pr_id_to_video: { authors: "Maithra Raghu" authors: "Samy Bengio" authors: "Oriol Vinyals" - repositories: { - url: "https://github.com/fmu2/PyTorch-MAML" - owner: "fmu2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 47 - description: "A PyTorch implementation of Model Agnostic Meta-Learning (MAML) that faithfully reproduces the results from the original paper." - } repositories: { url: "https://github.com/learnables/learn2learn" owner: "learnables" framework: FRAMEWORK_PYTORCH - number_of_stars: 1364 + number_of_stars: 1406 description: "A PyTorch Library for Meta-learning Research" } + repositories: { + url: "https://github.com/fmu2/PyTorch-MAML" + owner: "fmu2" + framework: FRAMEWORK_PYTORCH + number_of_stars: 49 + description: "A PyTorch implementation of Model Agnostic Meta-Learning (MAML) that faithfully reproduces the results from the original paper." + } methods: { name: "MAML" full_name: "Model-Agnostic Meta-Learning" @@ -18533,7 +18699,7 @@ pr_id_to_video: { paper_id: "model-agnostic-learning-to-meta-learn" title: "Model-Agnostic Learning to Meta-Learn" arxiv_id: "2012.02684" - abstract: "In this paper, we propose a learning algorithm that enables a model to quickly exploit commonalities among related tasks from an unseen task distribution, before quickly adapting to specific tasks from that same distribution. We investigate how learning with different task distributions can first improve adaptability by meta-finetuning on related tasks before improving goal task generalization with finetuning. Synthetic regression experiments validate the intuition that learning to meta-learn improves adaptability and consecutively generalization. The methodology, setup, and hypotheses in this proposal were positively evaluated by peer review before conclusive experiments were carried out." + abstract: "In this paper, we propose a learning algorithm that enables a model to quickly exploit commonalities among related tasks from an unseen task distribution, before quickly adapting to specific tasks from that same distribution. We investigate how learning with different task distributions can first improve adaptability by meta-finetuning on related tasks before improving goal task generalization with finetuning. Synthetic regression experiments validate the intuition that learning to meta-learn improves adaptability and consecutively generalization. Experiments on more complex image classification, continual regression, and reinforcement learning tasks demonstrate that learning to meta-learn generally improves task-specific adaptation. The methodology, setup, and hypotheses in this proposal were positively evaluated by peer review before conclusive experiments were carried out." published_date: { seconds: 1607040000 } @@ -18595,7 +18761,7 @@ pr_id_to_video: { url: "https://github.com/WangYueFt/rfs" owner: "WangYueFt" framework: FRAMEWORK_PYTORCH - number_of_stars: 255 + number_of_stars: 257 } } papers: { @@ -18634,8 +18800,8 @@ pr_id_to_video: { video: { video_id: "QjejBv33u-E" video_title: "PR-213: Rapid Learning or Feature Reuse? Towards Understanding the Effectiveness of MAML" - number_of_likes: 45 - number_of_views: 1661 + number_of_likes: 47 + number_of_views: 1724 published_date: { seconds: 1575814663 } @@ -18665,43 +18831,24 @@ pr_id_to_video: { authors: "Daniel Cremers" authors: "Thomas Brox" repositories: { - url: "https://github.com/ClementPinard/Pytorch-Correlation-extension" - owner: "ClementPinard" - framework: FRAMEWORK_PYTORCH - number_of_stars: 294 - description: "Custom implementation of Corrleation Module" + url: "https://github.com/philferriere/tfoptflow" + owner: "philferriere" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 439 + description: "Optical Flow Prediction with TensorFlow. Implements \"PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume,\" by Deqing Sun et al. (CVPR 2018)" } repositories: { url: "https://github.com/ClementPinard/FlowNetPytorch" owner: "ClementPinard" framework: FRAMEWORK_PYTORCH - number_of_stars: 656 + number_of_stars: 664 description: "Pytorch implementation of FlowNet by Dosovitskiy et al." } - repositories: { - url: "https://github.com/a-doering/tracker_w_correlation_motion_model" - owner: "a-doering" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Addition to multiple object tracker \"Tracktor\" from \"Tracking without bells and whistles\" paper." - } - repositories: { - url: "https://github.com/remibasaru/Flow-Net" - owner: "remibasaru" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/philferriere/tfoptflow" - owner: "philferriere" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 433 - description: "Optical Flow Prediction with TensorFlow. Implements \"PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume,\" by Deqing Sun et al. (CVPR 2018)" - } repositories: { url: "https://github.com/msracver/Deep-Feature-Flow" owner: "msracver" framework: FRAMEWORK_OTHERS - number_of_stars: 1256 + number_of_stars: 1255 description: "Deep Feature Flow for Video Recognition" } repositories: { @@ -18715,7 +18862,7 @@ pr_id_to_video: { url: "https://github.com/guanfuchen/video_obj" owner: "guanfuchen" framework: FRAMEWORK_PYTORCH - number_of_stars: 382 + number_of_stars: 384 description: "基于视频的目标检测算法研究" } repositories: { @@ -18731,6 +18878,27 @@ pr_id_to_video: { number_of_stars: 1 description: "customized chainer's function" } + repositories: { + url: "https://github.com/ClementPinard/Pytorch-Correlation-extension" + owner: "ClementPinard" + framework: FRAMEWORK_PYTORCH + number_of_stars: 299 + description: "Custom implementation of Corrleation Module" + } + repositories: { + url: "https://github.com/ankitAMD/Optical-Flow_Python" + owner: "ankitAMD" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Optical Flow_Python" + } + repositories: { + url: "https://github.com/McDo/LightFlowPytorch" + owner: "McDo" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Pytorch implementation of Light FlowNet" + } } papers: { paper_id: "a-large-dataset-to-train-convolutional" @@ -18747,12 +18915,6 @@ pr_id_to_video: { authors: "Daniel Cremers" authors: "Alexey Dosovitskiy" authors: "Thomas Brox" - repositories: { - url: "https://github.com/adbobes/VideoSuperResolution" - owner: "adbobes" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } repositories: { url: "https://github.com/arashk7/DispNet_Keras" owner: "arashk7" @@ -18764,7 +18926,7 @@ pr_id_to_video: { url: "https://github.com/HKBU-HPML/IRS" owner: "HKBU-HPML" framework: FRAMEWORK_PYTORCH - number_of_stars: 36 + number_of_stars: 37 description: "IRS: A Large Synthetic Indoor Robotics Stereo Dataset for Disparity and Surface Normal Estimation" } repositories: { @@ -18773,6 +18935,12 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH number_of_stars: 76 } + repositories: { + url: "https://github.com/adbobes/VideoSuperResolution" + owner: "adbobes" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } } papers: { paper_id: "devon-deformable-volume-network-for-learning" @@ -18899,15 +19067,15 @@ pr_id_to_video: { url: "https://github.com/twhui/LiteFlowNet2" owner: "twhui" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 211 + number_of_stars: 213 description: "A Lightweight Optical Flow CNN - Revisiting Data Fidelity and Regularization, TPAMI 2020" } } video: { video_id: "Z_t0shK98pM" video_title: "PR-214: FlowNet: Learning Optical Flow with Convolutional Networks" - number_of_likes: 64 - number_of_views: 2960 + number_of_likes: 73 + number_of_views: 3228 published_date: { seconds: 1576422775 } @@ -18930,69 +19098,72 @@ pr_id_to_video: { authors: "Joao Carreira" authors: "Andrew Zisserman" repositories: { - url: "https://github.com/hjchoi-minds/i3dnia" - owner: "hjchoi-minds" + url: "https://github.com/yaohungt/GSTEG_CVPR_2019" + owner: "yaohungt" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 145 + description: "[CVPR'19] [PyTorch] Gated Spatio Temporal Energy Graph" } repositories: { - url: "https://github.com/LukasHedegaard/co3d" - owner: "LukasHedegaard" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/vijayvee/behavior_recognition" + owner: "vijayvee" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 2 - description: "Continual 3D Convolutional Neural Networks" + description: "For this project, I am developing an end-to-end trainable behavior recognition system using deep convolutional networks. This project is based on Inception 3D, the state-of-the-art in action recognition." } repositories: { - url: "https://github.com/StanfordVL/RubiksNet" - owner: "StanfordVL" - framework: FRAMEWORK_PYTORCH - number_of_stars: 70 - description: "Official repo for ECCV 2020 paper - RubiksNet: Learnable 3D-Shift for Efficient Video Action Recognition" + url: "https://github.com/deepmind/kinetics-i3d" + owner: "deepmind" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1445 + description: "Convolutional neural network model for video classification trained on the Kinetics dataset." } repositories: { - url: "https://github.com/PPPrior/i3d-pytorch" - owner: "PPPrior" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "I3D Models in PyTorch" + url: "https://github.com/FrederikSchorr/sign-language" + owner: "FrederikSchorr" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70 + description: "Sign Language Recognition for Deaf People" } repositories: { - url: "https://github.com/open-mmlab/mmaction2" - owner: "open-mmlab" + url: "https://github.com/sebastiantiesmeyer/deeplabchop3d" + owner: "sebastiantiesmeyer" framework: FRAMEWORK_PYTORCH - number_of_stars: 1016 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + number_of_stars: 3 + description: "inflated labchop kinetic net" } repositories: { - url: "https://github.com/hassony2/kinetics_i3d_pytorch" - owner: "hassony2" - framework: FRAMEWORK_PYTORCH - number_of_stars: 420 - description: "Inflated i3d network with inception backbone, weights transfered from tensorflow" + url: "https://github.com/vijayvee/behavior-recognition" + owner: "vijayvee" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "For this project, I am developing an end-to-end trainable behavior recognition system using deep convolutional networks. This project is based on Inception 3D, the state-of-the-art in action recognition." } repositories: { - url: "https://github.com/anonymous-p/Flickering_Adversarial_Video" - owner: "anonymous-p" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Code and videos accompanying the paper \"Flickering Adversarial Attacks against Video Recognition Networks\"" + url: "https://github.com/ahsaniqbal/Kinetics-FeatureExtractor" + owner: "ahsaniqbal" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 16 } repositories: { - url: "https://github.com/JeffCHEN2017/WSSTG" - owner: "JeffCHEN2017" - framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "This repository contains the main baselines introduced in WSSTG (ACL 2019)." + url: "https://github.com/OanaIgnat/i3d_keras" + owner: "OanaIgnat" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 36 + description: "I3D implemetation in Keras + video preprocessing + visualization of results" } repositories: { - url: "https://github.com/ShobhitMaheshwari/sign-language1" - owner: "ShobhitMaheshwari" + url: "https://github.com/AbdurrahmanNadi/activity_recognition_web_service" + owner: "AbdurrahmanNadi" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "A REST API web service that employs state of the art deep learning models to perform human activity detection from video" } repositories: { - url: "https://github.com/helloxy96/CS5242_Project2020" - owner: "helloxy96" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/prinshul/GWSDR" + owner: "prinshul" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 14 } } papers: { @@ -19015,7 +19186,7 @@ pr_id_to_video: { url: "https://github.com/skelemoa/quovadis" owner: "skelemoa" framework: FRAMEWORK_OTHERS - number_of_stars: 21 + number_of_stars: 25 description: "Repository for the 'Quo Vadis, Skeleton Action Recognition ?' paper" } } @@ -19077,7 +19248,7 @@ pr_id_to_video: { url: "https://github.com/piergiaj/AViD" owner: "piergiaj" framework: FRAMEWORK_OTHERS - number_of_stars: 42 + number_of_stars: 43 description: "AViD Dataset: Anonymized Videos from Diverse Countries" } } @@ -19115,8 +19286,8 @@ pr_id_to_video: { video: { video_id: "z8YFARv5FrI" video_title: "PR-215: Quo Vadis, Action Recognition?A New Model and the Kinetics Dataset" - number_of_likes: 11 - number_of_views: 1110 + number_of_likes: 12 + number_of_views: 1153 published_date: { seconds: 1576420219 } @@ -19145,73 +19316,67 @@ pr_id_to_video: { authors: "Yanqi Zhou" authors: "Wei Li" authors: "Peter J. Liu" - repositories: { - url: "https://github.com/lesterpjy/numeric-t5" - owner: "lesterpjy" - framework: FRAMEWORK_OTHERS - number_of_stars: 9 - description: "Training T5 to perform numerical reasoning." - } repositories: { url: "https://github.com/lucidrains/x-transformers" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 901 + number_of_stars: 992 description: "A simple but complete full-attention transformer with a set of promising experimental features from various papers" } repositories: { - is_official: true - url: "https://github.com/google-research/text-to-text-transfer-transformer" - owner: "google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3489 - description: "Code for the paper \"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer\"" + url: "https://github.com/abelriboulot/onnxt5" + owner: "abelriboulot" + framework: FRAMEWORK_PYTORCH + number_of_stars: 176 + description: "Summarization, translation, sentiment-analysis, text-generation and more at blazing speed using a T5 version implemented in ONNX." } repositories: { - url: "https://github.com/Yale-LILY/dart" - owner: "Yale-LILY" + url: "https://github.com/safakkbilici/Academic-Paper-Title-Recommendation" + owner: "safakkbilici" framework: FRAMEWORK_OTHERS - number_of_stars: 73 + number_of_stars: 13 + description: "Supervised text summarization (title generation/recommendation) based on academic paper abstracts, with Seq2Seq LSTM and T5." } repositories: { - url: "https://github.com/allenai/c4-documentation" - owner: "allenai" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 + url: "https://github.com/wangcongcong123/ttt" + owner: "wangcongcong123" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "A package for fine-tuning Transformers with TPUs, written in Tensorflow2.0+" } repositories: { - url: "https://github.com/Ki6an/fastT5" - owner: "Ki6an" - framework: FRAMEWORK_PYTORCH - number_of_stars: 182 - description: "⚡ boost inference speed of T5 models by 5x & reduce the model size by 3x." + url: "https://github.com/thecodemasterk/Text-to-Text-transfer-transformers" + owner: "thecodemasterk" + framework: FRAMEWORK_TENSORFLOW + description: "Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer" } repositories: { - url: "https://github.com/huggingface/transformers" - owner: "huggingface" - framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 - description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + url: "https://github.com/itzprashu1/prashant" + owner: "itzprashu1" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/LeoLaugier/conditional-auto-encoder-text-to-text-transfer-transformer" - owner: "LeoLaugier" + url: "https://github.com/KAGUYAHONGLAI/SRC" + owner: "KAGUYAHONGLAI" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "Code for CAET5" } repositories: { - url: "https://github.com/safakkbilici/Academic-Paper-Title-Recommendation" - owner: "safakkbilici" - framework: FRAMEWORK_OTHERS - number_of_stars: 13 - description: "Supervised text summarization (title generation/recommendation) based on academic paper abstracts, with Seq2Seq LSTM and the power of Transfer Learning and T5." + url: "https://github.com/franziss/notes" + owner: "franziss" + framework: FRAMEWORK_PYTORCH } repositories: { url: "https://github.com/Nimesh-Patel/text-to-text-transfer-transformer" owner: "Nimesh-Patel" framework: FRAMEWORK_TENSORFLOW } + repositories: { + url: "https://github.com/LeoLaugier/conditional-auto-encoder-text-to-text-transfer-transformer" + owner: "LeoLaugier" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Code for CAET5" + } methods: { name: "Attention Dropout" full_name: "Attention Dropout" @@ -19276,11 +19441,19 @@ pr_id_to_video: { authors: "Dhaval Patel" authors: "Anuradha Bhamidipaty" authors: "Carsten Eickhoff" + repositories: { + is_official: true + url: "https://github.com/gzerveas/mvts_transformer" + owner: "gzerveas" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "Multivariate Time Series Transformer, public version" + } repositories: { url: "https://github.com/timeseriesAI/tsai/blob/main/tsai/models/TST.py" owner: "models" framework: FRAMEWORK_PYTORCH - number_of_stars: 629 + number_of_stars: 712 description: "Time series Timeseries Deep Learning Pytorch fastai - State-of-the-art Deep Learning with Time Series and Sequences in Pytorch / fastai" } } @@ -19317,24 +19490,24 @@ pr_id_to_video: { authors: "Aditya Barua" authors: "Colin Raffel" repositories: { - url: "https://github.com/google-research/byt5" + is_official: true + url: "https://github.com/google-research/multilingual-t5" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 273 + number_of_stars: 706 } repositories: { url: "https://github.com/huggingface/transformers" owner: "huggingface" framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 + number_of_stars: 49984 description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { - is_official: true - url: "https://github.com/google-research/multilingual-t5" + url: "https://github.com/google-research/byt5" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 676 + number_of_stars: 304 } methods: { name: "Attention Dropout" @@ -19397,75 +19570,75 @@ pr_id_to_video: { } authors: "Noam Shazeer" repositories: { - url: "https://github.com/lucidrains/progen" - owner: "lucidrains" - framework: FRAMEWORK_OTHERS + url: "https://github.com/BlinkDL/RWKV-LM" + owner: "BlinkDL" + framework: FRAMEWORK_PYTORCH number_of_stars: 30 - description: "Implementation and replication of ProGen, Language Modeling for Protein Generation, in Jax" - } - repositories: { - url: "https://github.com/lucidrains/progen-jax" - owner: "lucidrains" - framework: FRAMEWORK_OTHERS - number_of_stars: 30 - description: "Implementation and replication of ProGen, Language Modeling for Protein Generation, in Jax" + description: "The RWKV Language Model" } repositories: { url: "https://github.com/lab-ml/nn" owner: "lab-ml" framework: FRAMEWORK_PYTORCH - number_of_stars: 3215 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." - } - repositories: { - url: "https://github.com/lucidrains/x-transformers" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 901 - description: "A simple but complete full-attention transformer with a set of promising experimental features from various papers" - } - repositories: { - url: "https://github.com/lucidrains/performer-pytorch" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 649 - description: "An implementation of Performer, a linear attention-based transformer, in Pytorch" + number_of_stars: 3485 + description: "🧑‍🏫 Implementations/tutorials of deep learning papers with side-by-side notes 📝; including transformers (original, xl, switch, feedback, vit), optimizers (adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), 🎮 reinforcement learning (ppo, dqn), capsnet, distillation, etc. 🧠" } repositories: { url: "https://github.com/lucidrains/sinkhorn-transformer" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 170 + number_of_stars: 183 description: "Sinkhorn Transformer - Practical implementation of Sparse Sinkhorn Attention" } repositories: { url: "https://github.com/lucidrains/reformer-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 1538 + number_of_stars: 1568 description: "Reformer, the efficient Transformer, in Pytorch" } repositories: { url: "https://github.com/lucidrains/routing-transformer" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 182 + number_of_stars: 192 description: "Fully featured implementation of Routing Transformer" } repositories: { url: "https://github.com/lucidrains/linear-attention-transformer" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 250 + number_of_stars: 263 description: "Transformer based on a variant of attention that is linear complexity in respect to sequence length" } repositories: { url: "https://github.com/lucidrains/compressive-transformer-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 66 + number_of_stars: 72 description: "Pytorch implementation of Compressive Transformers, from Deepmind" } + repositories: { + url: "https://github.com/lucidrains/progen" + owner: "lucidrains" + framework: FRAMEWORK_OTHERS + number_of_stars: 35 + description: "Implementation and replication of ProGen, Language Modeling for Protein Generation, in Jax" + } + repositories: { + url: "https://github.com/lucidrains/performer-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 687 + description: "An implementation of Performer, a linear attention-based transformer, in Pytorch" + } + repositories: { + url: "https://github.com/lucidrains/x-transformers" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 992 + description: "A simple but complete full-attention transformer with a set of promising experimental features from various papers" + } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -19631,7 +19804,7 @@ pr_id_to_video: { url: "https://github.com/INK-USC/CALM" owner: "INK-USC" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + number_of_stars: 9 description: "Source code for ICLR 2021 paper : Pre-training Text-to-Text Transformers for Concept-Centric Common Sense" } } @@ -19639,7 +19812,7 @@ pr_id_to_video: { video_id: "Acp17_is9zU" video_title: "PR-216: Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer" number_of_likes: 11 - number_of_views: 725 + number_of_views: 769 published_date: { seconds: 1578239639 } @@ -19663,75 +19836,85 @@ pr_id_to_video: { authors: "Ruoming Pang" authors: "Quoc V. Le" repositories: { - url: "https://github.com/phungpx/efficient_det_pytorch" - owner: "phungpx" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "reimplement efficient det for object detection tasks" + url: "https://github.com/tensorflow/models/tree/master/research/object_detection" + owner: "research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70934 + description: "Models and examples built with TensorFlow" } repositories: { - url: "https://github.com/Luckygyana/Invo-AI" - owner: "Luckygyana" + url: "https://github.com/signatrix/efficientdet" + owner: "signatrix" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "We propose an extraction system that use knowledge of the types of the target fields to generate extraction candidates, and a neural network architecture that learns a dense representation of each candidate based on neighbouring words in the document. These learned representations are not only useful in solving the extraction task for unseen document templates from two different domains, but are also interpretable in classic document processing." - } - repositories: { - url: "https://github.com/kochlisGit/Custom-Small-Logo-Detection" - owner: "kochlisGit" - framework: FRAMEWORK_TENSORFLOW - description: "Logo Detection of a custom small dataset. The dataset contains logos of 6 famous brands: Nike, Jordans, Adidas, Puma, Kappa, Quicksilver" + number_of_stars: 574 + description: "(Pretrained weights provided) EfficientDet: Scalable and Efficient Object Detection implementation by Signatrix GmbH" } repositories: { - url: "https://github.com/wangermeng2021/EfficientDet-tensorflow2" - owner: "wangermeng2021" + url: "https://github.com/google/automl/tree/master/efficientdet" + owner: "master" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "A Tensorflow2.x implementation of EfficientDet" + number_of_stars: 4502 + description: "Google Brain AutoML" } repositories: { - url: "https://github.com/tensorflow/models/tree/master/research/object_detection" - owner: "research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70578 - description: "Models and examples built with TensorFlow" + url: "https://github.com/Jintao-Huang/EfficientDet_PyTorch" + owner: "Jintao-Huang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 16 + description: "EfficientDet_PyTorch 目标检测(Object Detection)" } repositories: { - url: "https://github.com/djkim3/Yet-Another-EfficientDet-Pytorch" - owner: "djkim3" + url: "https://github.com/toandaominh1997/EfficientDet.Pytorch" + owner: "toandaominh1997" framework: FRAMEWORK_PYTORCH + number_of_stars: 1409 + description: "Implementation EfficientDet: Scalable and Efficient Object Detection in PyTorch" } repositories: { - url: "https://github.com/kushaswani/LitterNet-webapp" - owner: "kushaswani" + url: "https://github.com/HardLaugh/EfficientDet-bifpn" + owner: "HardLaugh" framework: FRAMEWORK_PYTORCH - description: "Web application demonstrating LitterNet" + number_of_stars: 49 + description: "mmdetection-based efficientdet" } repositories: { - url: "https://github.com/lvweiwolf/efficientdet" - owner: "lvweiwolf" + url: "https://github.com/xuannianz/EfficientDet" + owner: "xuannianz" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1192 + description: "EfficientDet (Scalable and Efficient Object Detection) implementation in Keras and Tensorflow" } repositories: { - url: "https://github.com/shivam-raj/detector" - owner: "shivam-raj" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/kentaroy47/efficientdet.pytorch" + owner: "kentaroy47" + framework: FRAMEWORK_PYTORCH + number_of_stars: 55 + description: ":neckbeard:Unofficial implementation of EfficientDet" } repositories: { - url: "https://github.com/ravi02512/efficientdet-keras" - owner: "ravi02512" + url: "https://github.com/SweetyTian/efficientdet" + owner: "SweetyTian" + framework: FRAMEWORK_PYTORCH + number_of_stars: 142 + description: "unofficial EffcientDet implemented by mmdetection" + } + repositories: { + is_official: true + url: "https://github.com/google/automl" + owner: "google" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 + number_of_stars: 4502 + description: "Google Brain AutoML" } methods: { - name: "Sigmoid Activation" - full_name: "Sigmoid Activation" - description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + name: "Inverted Residual Block" + full_name: "Inverted Residual Block" + description: "An **Inverted Residual Block**, sometimes called an **MBConv Block**, is a type of residual block used for image models that uses an inverted structure for efficiency reasons. It was originally proposed for the [MobileNetV2](https://paperswithcode.com/method/mobilenetv2) CNN architecture. It has since been reused for several mobile-optimized CNNs.\r\n\r\nA traditional [Residual Block](https://paperswithcode.com/method/residual-block) has a wide -> narrow -> wide structure with the number of channels. The input has a high number of channels, which are compressed with a 1x1 convolution. The number of channels is then increased again with a 1x1 convolution so input and output can be added. \r\n\r\nIn contrast, an Inverted Residual Block follows a narrow -> wide -> narrow approach, hence the inversion. We first widen with a 1x1 convolution, then use a 3x3 depthwise convolution (which greatly reduces the number of parameters), then we use a 1x1 convolution to reduce the number of channels so input and output can be added." } methods: { - name: "Squeeze-and-Excitation Block" - full_name: "Squeeze-and-Excitation Block" - description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." + name: "Image Scale Augmentation" + full_name: "Image Scale Augmentation" + description: "Image Scale Augmentation is an augmentation technique where we randomly pick the short size of a image within a dimension range. One use case of this augmentation technique is in object detectiont asks." } methods: { name: "EfficientDet" @@ -19739,39 +19922,39 @@ pr_id_to_video: { description: "**EfficientDet** is a type of object detection model, which utilizes several optimization and backbone tweaks, such as the use of a BiFPN, and a compound scaling method that uniformly scales the resolution,depth and width for all backbones, feature networks and box/class prediction networks at the same time." } methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" } methods: { - name: "Focal Loss" - full_name: "Focal Loss" - description: "A **Focal Loss** function addresses class imbalance during training in tasks like object detection. Focal loss applies a modulating term to the cross entropy loss in order to focus learning on hard negative examples. It is a dynamically scaled cross entropy loss, where the scaling factor decays to zero as confidence in the correct class increases. Intuitively, this scaling factor can automatically down-weight the contribution of easy examples during training and rapidly focus the model on hard examples. \r\n\r\nFormally, the Focal Loss adds a factor $(1 - p\\_{t})^\\gamma$ to the standard cross entropy criterion. Setting $\\gamma>0$ reduces the relative loss for well-classified examples ($p\\_{t}>.5$), putting more focus on hard, misclassified examples. Here there is tunable *focusing* parameter $\\gamma \\ge 0$. \r\n\r\n$$ {\\text{FL}(p\\_{t}) = - (1 - p\\_{t})^\\gamma \\log\\left(p\\_{t}\\right)} $$" + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "Linear Warmup With Cosine Annealing" - full_name: "Linear Warmup With Cosine Annealing" - description: "**Linear Warmup With Cosine Annealing** is a learning rate schedule where we increase the learning rate linearly for $n$ updates and then anneal according to a cosine schedule afterwards." + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + name: "Cosine Annealing" + full_name: "Cosine Annealing" + description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" } } papers: { @@ -19791,7 +19974,7 @@ pr_id_to_video: { url: "https://github.com/mahdi65/roadDamageDetection2020" owner: "mahdi65" framework: FRAMEWORK_PYTORCH - number_of_stars: 25 + number_of_stars: 24 description: "repository contain codes for IEEE BigData Cup Challange 2020" } methods: { @@ -19866,13 +20049,6 @@ pr_id_to_video: { authors: "Bijan Najafi" authors: "Justina Wu" authors: "Moi Hoon Yap" - repositories: { - url: "https://github.com/0xc4f3/dfuc2020_snippets" - owner: "0xc4f3" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Code snippets provided alongside with a submission to the DFUC 2020." - } } papers: { paper_id: "asfd-automatic-and-scalable-face-detector" @@ -19925,75 +20101,73 @@ pr_id_to_video: { authors: "Chien-Yao Wang" authors: "Hong-Yuan Mark Liao" repositories: { - url: "https://github.com/wiegehtki/zoneminder-jetson" - owner: "wiegehtki" + url: "https://github.com/Abhi-899/YOLOV4-Custom-Object-Detection" + owner: "Abhi-899" framework: FRAMEWORK_OTHERS - description: "Personen und Gesichtserkennung mit Zoneminder, OpenCV (GPU), YOLO, cuDNN und CUDA" + description: "In this project we will train the YOLOV4 network on 3 classes 'Ambulance' , 'Car' , 'Person' with the Google open image dataset and run the detection on a real video caught on a moving traffic camera" } repositories: { - url: "https://github.com/hunter10bt/DeepLearningFinalPresentation" - owner: "hunter10bt" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/wangermeng2021/Scaled-YOLOv4-tensorflow2" - owner: "wangermeng2021" + url: "https://github.com/Lebhoryi/keras-YOLOv3-model-set" + owner: "Lebhoryi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 24 - description: "A Tensorflow2.x implementation of Scaled-YOLOv4 as described in Scaled-YOLOv4: Scaling Cross Stage Partial Network" + number_of_stars: 1 + description: "转自https://github.com/david8862/keras-YOLOv3-model-set " } repositories: { - url: "https://github.com/FelixFu520/yolov4" - owner: "FelixFu520" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RobotMobile/cv-deep-learning-paper-review" + owner: "RobotMobile" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { - url: "https://github.com/david8862/keras-YOLOv3-model-set" - owner: "david8862" + url: "https://github.com/otamajakusi/darknet-yolov4" + owner: "otamajakusi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 494 - description: "end-to-end YOLOv4/v3/v2 object detection pipeline, implemented on tf.keras with different technologies" } repositories: { - url: "https://github.com/MEME-Phoenix/Autonomous-Driving-Cart-MEME" - owner: "MEME-Phoenix" - framework: FRAMEWORK_PYTORCH - description: "Autonomous Driving Cart, MEME" + url: "https://github.com/ccie29441/Yolo-v4-and-Yolo-v3-v2-for-Windows-and-Linux" + owner: "ccie29441" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 } repositories: { - url: "https://github.com/Qengineering/YoloV4-ncnn-Jetson-Nano" - owner: "Qengineering" + url: "https://github.com/Dodant/ANPR-with-Yolov4" + owner: "Dodant" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "YoloV4 for Jetson Nano" + number_of_stars: 10 + description: "2020-1 CNU SW Capstone Design Project" } repositories: { - url: "https://github.com/CRIGIM/darknet" - owner: "CRIGIM" + url: "https://github.com/hhk7734/tensorflow-yolov4" + owner: "hhk7734" framework: FRAMEWORK_TENSORFLOW - description: "edited darknet" + number_of_stars: 126 + description: "YOLOv4 Implemented in Tensorflow 2." } repositories: { - url: "https://github.com/ayoungkang/yolov4" - owner: "ayoungkang" + url: "https://github.com/weidalin/yolov4_mixup" + owner: "weidalin" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "yolov4 data augments with mixup" } repositories: { - url: "https://github.com/taeokimeng/object-detection-yolo" - owner: "taeokimeng" + url: "https://github.com/Qengineering/YoloV4-ncnn-Raspberry-Pi-64-bit" + owner: "Qengineering" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Object Detection with YOLO and Streamlit" + number_of_stars: 25 + description: "YoloV4 on a bare Raspberry Pi 4 with ncnn framework" } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + repositories: { + url: "https://github.com/RunzhaoHuang/DeepSort_YOLOV5_OnScreen" + owner: "RunzhaoHuang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } methods: { - name: "Spatial Attention Module" - full_name: "Spatial Attention Module" - description: "A **Spatial Attention Module** is a module for spatial attention in convolutional neural networks. It generates a spatial attention map by utilizing the inter-spatial relationship of features. Different from the [channel attention](https://paperswithcode.com/method/channel-attention-module), the spatial attention focuses on where is an informative part, which is complementary to the channel attention. To compute the spatial attention, we first apply average-pooling and max-pooling operations along the channel axis and concatenate them to generate an efficient feature descriptor. On the concatenated feature descriptor, we apply a convolution layer to generate a spatial attention map $\\textbf{M}\\_{s}\\left(F\\right) \\in \\mathcal{R}^{H×W}$ which encodes where to emphasize or suppress. \r\n\r\nWe aggregate channel information of a feature map by using two pooling operations, generating two 2D maps: $\\mathbf{F}^{s}\\_{avg} \\in \\mathbb{R}^{1\\times{H}\\times{W}}$ and $\\mathbf{F}^{s}\\_{max} \\in \\mathbb{R}^{1\\times{H}\\times{W}}$. Each denotes average-pooled features and max-pooled features across the channel. Those are then concatenated and convolved by a standard convolution layer, producing the 2D spatial attention map. In short, the spatial attention is computed as:\r\n\r\n$$ \\textbf{M}\\_{s}\\left(F\\right) = \\sigma\\left(f^{7x7}\\left(\\left[\\text{AvgPool}\\left(F\\right);\\text{MaxPool}\\left(F\\right)\\right]\\right)\\right) $$\r\n\r\n$$ \\textbf{M}\\_{s}\\left(F\\right) = \\sigma\\left(f^{7x7}\\left(\\left[\\mathbf{F}^{s}\\_{avg};\\mathbf{F}^{s}\\_{max} \\right]\\right)\\right) $$\r\n\r\nwhere $\\sigma$ denotes the sigmoid function and $f^{7×7}$ represents a convolution operation with the filter size of 7 × 7." + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } methods: { name: "Bottom-up Path Augmentation" @@ -20001,19 +20175,24 @@ pr_id_to_video: { description: "**Bottom-up Path Augmentation** is a feature extraction technique that seeks to shorten the information path and enhance a feature pyramid with accurate localization signals existing in low-levels. This is based on the fact that high response to edges or instance parts is a strong indicator to accurately localize instances. \r\n\r\nEach building block takes a higher resolution feature map $N\\_{i}$ and a coarser map $P\\_{i+1}$ through lateral connection and generates the new feature map $N\\_{i+1}$ Each feature map $N\\_{i}$ first goes through a $3 \\times 3$ convolutional layer with stride $2$ to reduce the spatial size. Then each element of feature map $P\\_{i+1}$ and the down-sampled map are added through lateral connection. The fused feature map is then processed by another $3 \\times 3$ convolutional layer to generate $N\\_{i+1}$ for following sub-networks. This is an iterative process and terminates after approaching $P\\_{5}$. In these building blocks, we consistently use channel 256 of feature maps. The feature grid for each proposal is then pooled from new feature maps, i.e., {$N\\_{2}$, $N\\_{3}$, $N\\_{4}$, $N\\_{5}$}." } methods: { - name: "PAFPN" - full_name: "PAFPN" - description: "**PAFPN** is a feature pyramid module used in Path Aggregation networks ([PANet](https://paperswithcode.com/method/panet)) that combines FPNs with bottom-up path augmentation, which shortens the information path between lower layers and topmost feature." + name: "k-Means Clustering" + full_name: "k-Means Clustering" + description: "**k-Means Clustering** is a clustering algorithm that divides a training set into $k$ different clusters of examples that are near each other. It works by initializing $k$ different centroids {$\\mu\\left(1\\right),\\ldots,\\mu\\left(k\\right)$} to different values, then alternating between two steps until convergence:\r\n\r\n(i) each training example is assigned to cluster $i$ where $i$ is the index of the nearest centroid $\\mu^{(i)}$\r\n\r\n(ii) each centroid $\\mu^{(i)}$ is updated to the mean of all training examples $x^{(j)}$ assigned to cluster $i$.\r\n\r\nText Source: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [scikit-learn](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_digits.html)" } methods: { - name: "DIoU-NMS" - full_name: "DIoU-NMS" - description: "**DIoU-NMS** is a type of non-maximum suppression where we use Distance IoU rather than regular DIoU, in which the overlap area and the distance between two central points of bounding boxes are simultaneously considered when suppressing redundant boxes.\r\n\r\nIn original NMS, the IoU metric is used to suppress the redundant detection boxes, where the overlap area is the unique factor, often yielding false suppression for the cases with occlusion. With DIoU-NMS, we not only consider the overlap area but also central point distance between two boxes." + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." } methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { name: "Weight Decay" @@ -20021,19 +20200,19 @@ pr_id_to_video: { description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" } methods: { - name: "YOLOv3" - full_name: "YOLOv3" - description: "**YOLOv3** is a real-time, single-stage object detection model that builds on [YOLOv2](https://paperswithcode.com/method/yolov2) with several improvements. Improvements include the use of a new backbone network, Darknet-53 that utilises residual connections, or in the words of the author, \"those newfangled residual network stuff\", as well as some improvements to the bounding box prediction step, and use of three different scales from which to extract features (similar to an FPN)." + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + name: "Spatial Pyramid Pooling" + full_name: "Spatial Pyramid Pooling" + description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." } methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } } papers: { @@ -20117,14 +20296,14 @@ pr_id_to_video: { url: "https://github.com/ybkscht/EfficientPose" owner: "ybkscht" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 94 + number_of_stars: 104 } } video: { video_id: "11jDC8uZL0E" video_title: "PR-217: EfficientDet: Scalable and Efficient Object Detection" - number_of_likes: 74 - number_of_views: 5657 + number_of_likes: 78 + number_of_views: 5831 published_date: { seconds: 1578238913 } @@ -20166,7 +20345,7 @@ pr_id_to_video: { url: "https://github.com/zhoubenjia/RAAR3DNet" owner: "zhoubenjia" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + number_of_stars: 5 } methods: { name: "Global Average Pooling" @@ -20240,7 +20419,7 @@ pr_id_to_video: { url: "https://github.com/ZitongYu/3DCDC-NAS" owner: "ZitongYu" framework: FRAMEWORK_PYTORCH - number_of_stars: 25 + number_of_stars: 30 description: "AutoGesture with 3DCDC" } methods: { @@ -20440,8 +20619,8 @@ pr_id_to_video: { video: { video_id: "MuJpHR1CpTc" video_title: "PR-218 : MFAS: Multimodal Fusion Architecture Search" - number_of_likes: 9 - number_of_views: 748 + number_of_likes: 11 + number_of_views: 765 published_date: { seconds: 1579016656 } @@ -20465,9 +20644,11 @@ pr_id_to_video: { authors: "Misko Dzamba" authors: "Jason Yosinski" repositories: { - url: "https://github.com/rickyHong/Hamiltonian-NN-repl" - owner: "rickyHong" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/MilesCranmer/lagrangian_nns" + owner: "MilesCranmer" + framework: FRAMEWORK_OTHERS + number_of_stars: 262 + description: "Lagrangian Neural Networks" } repositories: { url: "https://github.com/ayushgarg31/HNN-Neurips2019" @@ -20475,11 +20656,9 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/MilesCranmer/lagrangian_nns" - owner: "MilesCranmer" - framework: FRAMEWORK_OTHERS - number_of_stars: 261 - description: "Lagrangian Neural Networks" + url: "https://github.com/rickyHong/Hamiltonian-NN-repl" + owner: "rickyHong" + framework: FRAMEWORK_PYTORCH } } papers: { @@ -20589,6 +20768,13 @@ pr_id_to_video: { authors: "Jinkyoo Park" authors: "Atsushi Yamashita" authors: "Hajime Asama" + repositories: { + url: "https://github.com/esclear/ph-nn" + owner: "esclear" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "A TensorFlow implementation of Port-Hamiltonian neural networks" + } repositories: { is_official: true url: "https://github.com/Zymrael/PortHamiltonianNN" @@ -20597,19 +20783,12 @@ pr_id_to_video: { number_of_stars: 13 description: "Port-Hamiltonian Approach to Neural Network Training" } - repositories: { - url: "https://github.com/esclear/ph-nn" - owner: "esclear" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "A TensorFlow implementation of Port-Hamiltonian neural networks" - } } video: { video_id: "uSLdQfuc21k" video_title: "PR-219: Hamiltonian Neural Networks" number_of_likes: 14 - number_of_views: 1075 + number_of_views: 1118 published_date: { seconds: 1578839500 } @@ -20636,7 +20815,7 @@ pr_id_to_video: { url: "https://github.com/xiaolonw/TimeCycle" owner: "xiaolonw" framework: FRAMEWORK_PYTORCH - number_of_stars: 684 + number_of_stars: 685 description: "Learning Correspondence from the Cycle-consistency of Time (CVPR 2019)" } } @@ -20683,6 +20862,13 @@ pr_id_to_video: { authors: "Jonathan Tompson" authors: "Pierre Sermanet" authors: "Andrew Zisserman" + repositories: { + url: "https://github.com/google-research/google-research/tree/master/tcc" + owner: "master" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18789 + description: "Google Research" + } repositories: { url: "https://github.com/June01/tcc_Temporal_Cycle_Consistency_Loss.pytorch" owner: "June01" @@ -20690,13 +20876,6 @@ pr_id_to_video: { number_of_stars: 9 description: "This is the pytorch version of tcc loss, used in paper 'Temporal Cycle-Consistency Learning'." } - repositories: { - url: "https://github.com/google-research/google-research/tree/master/tcc" - owner: "master" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18410 - description: "Google Research" - } } papers: { paper_id: "aligning-videos-in-space-and-time" @@ -20757,8 +20936,8 @@ pr_id_to_video: { video: { video_id: "5g8-8Hz5x08" video_title: "PR-220: Learning Correspondence from the Cycle-Consistency of Time" - number_of_likes: 10 - number_of_views: 740 + number_of_likes: 11 + number_of_views: 751 published_date: { seconds: 1579481888 } @@ -20785,11 +20964,12 @@ pr_id_to_video: { authors: "Brandon Tran" authors: "Aleksander Madry" repositories: { - url: "https://github.com/lengstrom/gitlinks" - owner: "lengstrom" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 - description: "gitlinks - Git Powered Go-Links! 👴⛓" + is_official: true + url: "https://github.com/MadryLab/robustness" + owner: "MadryLab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 572 + description: "A library for experimenting with, training and evaluating neural networks, with a focus on adversarial robustness." } repositories: { url: "https://github.com/xziyue/MNIST_Features" @@ -20799,12 +20979,11 @@ pr_id_to_video: { description: "Robust and non-robust features extracted from MNIST" } repositories: { - is_official: true - url: "https://github.com/MadryLab/robustness" - owner: "MadryLab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 559 - description: "A library for experimenting with, training and evaluating neural networks, with a focus on adversarial robustness." + url: "https://github.com/lengstrom/gitlinks" + owner: "lengstrom" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "gitlinks - Git Powered Go-Links! 👴⛓" } } papers: { @@ -20926,7 +21105,7 @@ pr_id_to_video: { video_id: "Xx4I0pYLJUM" video_title: "PR-221: Adversarial Examples Are Not Bugs, They Are Features" number_of_likes: 9 - number_of_views: 1003 + number_of_views: 1018 published_date: { seconds: 1579445344 } @@ -20950,28 +21129,28 @@ pr_id_to_video: { authors: "Xiaohua Zhai" authors: "Lucas Beyer" repositories: { - url: "https://github.com/virtualgraham/sc_patch" - owner: "virtualgraham" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - } - repositories: { - url: "https://github.com/moabitcoin/ssvr" - owner: "moabitcoin" + url: "https://github.com/rickyHong/Puzzle-tensorflow-latest-repl" + owner: "rickyHong" framework: FRAMEWORK_TENSORFLOW - description: "Rotations gonna rotate, potatoes gonna potate." } repositories: { is_official: true url: "https://github.com/google/revisiting-self-supervised" owner: "google" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 332 + number_of_stars: 334 } repositories: { - url: "https://github.com/rickyHong/Puzzle-tensorflow-latest-repl" - owner: "rickyHong" + url: "https://github.com/moabitcoin/ssvr" + owner: "moabitcoin" framework: FRAMEWORK_TENSORFLOW + description: "Rotations gonna rotate, potatoes gonna potate." + } + repositories: { + url: "https://github.com/virtualgraham/sc_patch" + owner: "virtualgraham" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 } methods: { name: "Global Average Pooling" @@ -21036,19 +21215,19 @@ pr_id_to_video: { authors: "Dhruv Mahajan" authors: "Abhinav Gupta" authors: "Ishan Misra" - repositories: { - url: "https://github.com/329tyson/fai-ssl-challenge" - owner: "329tyson" - framework: FRAMEWORK_PYTORCH - } repositories: { is_official: true url: "https://github.com/facebookresearch/fair_self_supervision_benchmark" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 550 + number_of_stars: 552 description: "Scaling and Benchmarking Self-Supervised Visual Representation Learning" } + repositories: { + url: "https://github.com/329tyson/fai-ssl-challenge" + owner: "329tyson" + framework: FRAMEWORK_PYTORCH + } } papers: { paper_id: "revisiting-contrastive-methods-for" @@ -21062,12 +21241,19 @@ pr_id_to_video: { authors: "Simon Vandenhende" authors: "Stamatios Georgoulis" authors: "Luc Van Gool" + repositories: { + url: "https://github.com/wvangansbeke/Unsupervised-Classification" + owner: "wvangansbeke" + framework: FRAMEWORK_PYTORCH + number_of_stars: 753 + description: "SCAN: Learning to Classify Images without Labels, incl. SimCLR. [ECCV 2020]" + } repositories: { is_official: true url: "https://github.com/wvangansbeke/Revisiting-Contrastive-SSL" owner: "wvangansbeke" framework: FRAMEWORK_PYTORCH - number_of_stars: 35 + number_of_stars: 48 description: "Revisiting Contrastive Methods for Unsupervised Learning of Visual Representations. [2021]" } methods: { @@ -21127,7 +21313,7 @@ pr_id_to_video: { url: "https://github.com/CupidJay/Scaled-down-self-supervised-learning" owner: "CupidJay" framework: FRAMEWORK_PYTORCH - number_of_stars: 15 + number_of_stars: 16 description: "official pytorch implementation of Rethining Self-supervised Learning: Small is Beautiful." } } @@ -21218,8 +21404,8 @@ pr_id_to_video: { video: { video_id: "6pDIfC14By8" video_title: "PR-222: Revisiting Self-Supervised Visual Representation Learning" - number_of_likes: 22 - number_of_views: 1363 + number_of_likes: 23 + number_of_views: 1410 published_date: { seconds: 1580653192 } @@ -21266,25 +21452,25 @@ pr_id_to_video: { authors: "Jingtuo Liu" authors: "Errui Ding" repositories: { - url: "https://github.com/AdamHtooLwin/vigilant" - owner: "AdamHtooLwin" + url: "https://github.com/Podidiving/lgsc-for-fas-pytorch" + owner: "Podidiving" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 25 + description: "Learning Generalized Spoof Cues for FaceAnti-spoofing reimplementation in pytorch" } repositories: { is_official: true url: "https://github.com/vis-var/lgsc-for-fas" owner: "vis-var" framework: FRAMEWORK_PYTORCH - number_of_stars: 176 + number_of_stars: 177 description: "Learning Generalized Spoof Cues for FaceAnti-spoofing" } repositories: { - url: "https://github.com/Podidiving/lgsc-for-fas-pytorch" - owner: "Podidiving" + url: "https://github.com/AdamHtooLwin/vigilant" + owner: "AdamHtooLwin" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "Learning Generalized Spoof Cues for FaceAnti-spoofing reimplementation in pytorch" + number_of_stars: 1 } methods: { name: "Auxiliary Classifier" @@ -21308,7 +21494,7 @@ pr_id_to_video: { url: "https://github.com/yaojieliu/ECCV20-STDN" owner: "yaojieliu" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 107 + number_of_stars: 105 description: "Source code for ECCV 2020 paper: On Disentangling Spoof Trace for Generic Face Anti-Spoofing" } } @@ -21374,7 +21560,7 @@ pr_id_to_video: { url: "https://github.com/ZitongYu/CDCN" owner: "ZitongYu" framework: FRAMEWORK_PYTORCH - number_of_stars: 344 + number_of_stars: 359 description: "Central Difference Convolutional Networks" } } @@ -21395,7 +21581,7 @@ pr_id_to_video: { url: "https://github.com/taylover-pei/SSDG-CVPR2020" owner: "taylover-pei" framework: FRAMEWORK_PYTORCH - number_of_stars: 123 + number_of_stars: 128 description: "Single-Side Domain Generalization for Face Anti-Spoofing, CVPR2020" } methods: { @@ -21427,7 +21613,7 @@ pr_id_to_video: { url: "https://github.com/ZitongYu/CDCN" owner: "ZitongYu" framework: FRAMEWORK_PYTORCH - number_of_stars: 344 + number_of_stars: 359 description: "Central Difference Convolutional Networks" } methods: { @@ -21440,7 +21626,7 @@ pr_id_to_video: { video_id: "V9lY0fDYLE4" video_title: "PR-223: Learning Meta Model for Zero- and Few-shot Face Anti-spoofing" number_of_likes: 3 - number_of_views: 340 + number_of_views: 345 published_date: { seconds: 1580654049 } @@ -21466,32 +21652,18 @@ pr_id_to_video: { authors: "Hanrui Wang" authors: "Li-Jia Li" authors: "Song Han" - repositories: { - is_official: true - url: "https://github.com/mit-han-lab/amc" - owner: "mit-han-lab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 315 - description: "[ECCV 2018] AMC: AutoML for Model Compression and Acceleration on Mobile Devices" - } - repositories: { - url: "https://github.com/seulkiyeom/once-for-all" - owner: "seulkiyeom" - framework: FRAMEWORK_PYTORCH - description: "Transformable NAS (based on OFA network)" - } repositories: { url: "https://github.com/NervanaSystems/distiller" owner: "NervanaSystems" framework: FRAMEWORK_PYTORCH - number_of_stars: 3574 + number_of_stars: 3615 description: "Neural Network Distiller by Intel AI Lab: a Python package for neural network compression research. https://intellabs.github.io/distiller" } repositories: { url: "https://github.com/songhan/DSD" owner: "songhan" framework: FRAMEWORK_PYTORCH - number_of_stars: 108 + number_of_stars: 109 description: "DSD model zoo. Better accuracy models from DSD training on Imagenet with same model architecture" } repositories: { @@ -21504,21 +21676,21 @@ pr_id_to_video: { url: "https://github.com/mit-han-lab/once-for-all" owner: "mit-han-lab" framework: FRAMEWORK_PYTORCH - number_of_stars: 1259 + number_of_stars: 1294 description: "[ICLR 2020] Once for All: Train One Network and Specialize it for Efficient Deployment" } repositories: { url: "https://github.com/songhan/SqueezeNet-Residual" owner: "songhan" framework: FRAMEWORK_OTHERS - number_of_stars: 154 + number_of_stars: 155 description: "residual-SqueezeNet" } repositories: { url: "https://github.com/mit-han-lab/haq-release" owner: "mit-han-lab" framework: FRAMEWORK_PYTORCH - number_of_stars: 260 + number_of_stars: 266 description: "[CVPR 2019, Oral] HAQ: Hardware-Aware Automated Quantization with Mixed Precision" } repositories: { @@ -21531,9 +21703,23 @@ pr_id_to_video: { url: "https://github.com/mit-han-lab/ProxylessNAS" owner: "mit-han-lab" framework: FRAMEWORK_PYTORCH - number_of_stars: 1245 + number_of_stars: 1255 description: "[ICLR 2019] ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware" } + repositories: { + url: "https://github.com/seulkiyeom/once-for-all" + owner: "seulkiyeom" + framework: FRAMEWORK_PYTORCH + description: "Transformable NAS (based on OFA network)" + } + repositories: { + is_official: true + url: "https://github.com/mit-han-lab/amc" + owner: "mit-han-lab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 323 + description: "[ECCV 2018] AMC: AutoML for Model Compression and Acceleration on Mobile Devices" + } methods: { name: "VGG-16" full_name: "VGG-16" @@ -21765,8 +21951,8 @@ pr_id_to_video: { video: { video_id: "Jp5E8PxiB_4" video_title: "PR-224: AMC: AutoML for Model Compression and Acceleration on Mobile Devices" - number_of_likes: 12 - number_of_views: 578 + number_of_likes: 14 + number_of_views: 604 published_date: { seconds: 1581584633 } @@ -21792,22 +21978,18 @@ pr_id_to_video: { authors: "Lidia del Rio" authors: "Renato Renner" repositories: { - url: "https://github.com/matlab-deep-learning/Physical-Concepts-Scinet" - owner: "matlab-deep-learning" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "This repository provides implementation of SciNet network described in arXiv:1807.10300v3" - } - repositories: { - url: "https://github.com/abdallaharar/PHYS-490-Project" - owner: "abdallaharar" + url: "https://github.com/fd17/SciNet_PyTorch" + owner: "fd17" framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + description: "A PyTorch implementation of the SciNet Paper" } repositories: { - url: "https://github.com/reginareis/doutorado" - owner: "reginareis" + is_official: true + url: "https://github.com/eth-nn-physics/nn_physical_concepts" + owner: "eth-nn-physics" framework: FRAMEWORK_TENSORFLOW - description: "projetos para o doutorado" + number_of_stars: 132 } repositories: { url: "https://github.com/k-woodruff/PMNSnet" @@ -21815,18 +21997,22 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/fd17/SciNet_PyTorch" - owner: "fd17" + url: "https://github.com/reginareis/doutorado" + owner: "reginareis" + framework: FRAMEWORK_TENSORFLOW + description: "projetos para o doutorado" + } + repositories: { + url: "https://github.com/abdallaharar/PHYS-490-Project" + owner: "abdallaharar" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "A PyTorch implementation of the SciNet Paper" } repositories: { - is_official: true - url: "https://github.com/eth-nn-physics/nn_physical_concepts" - owner: "eth-nn-physics" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 131 + url: "https://github.com/matlab-deep-learning/Physical-Concepts-Scinet" + owner: "matlab-deep-learning" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "This repository provides implementation of SciNet network described in arXiv:1807.10300v3" } } papers: { @@ -21924,7 +22110,7 @@ pr_id_to_video: { video_id: "EU5NyvoqRrY" video_title: "PR-225: Discovering Physical Concepts With Neural Networks" number_of_likes: 2 - number_of_views: 317 + number_of_views: 321 published_date: { seconds: 1581258007 } @@ -21947,22 +22133,11 @@ pr_id_to_video: { authors: "Murat Sensoy" authors: "Lance Kaplan" authors: "Melih Kandemir" - repositories: { - url: "https://github.com/ms96590/pytorch-classification-uncertainty" - owner: "ms96590" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/ms96590/-" - owner: "ms96590" - framework: FRAMEWORK_PYTORCH - description: "UQ D" - } repositories: { url: "https://github.com/atilberk/evidential-deep-learning-to-quantify-classification-uncertainty" owner: "atilberk" framework: FRAMEWORK_OTHERS - number_of_stars: 29 + number_of_stars: 31 description: "Work on Evidential Deep Learning to Quantify Classification Uncertainty" } repositories: { @@ -21976,9 +22151,20 @@ pr_id_to_video: { url: "https://github.com/dougbrion/pytorch-classification-uncertainty" owner: "dougbrion" framework: FRAMEWORK_PYTORCH - number_of_stars: 91 + number_of_stars: 99 description: "This repo contains a PyTorch implementation of the paper: \"Evidential Deep Learning to Quantify Classification Uncertainty\"" } + repositories: { + url: "https://github.com/ms96590/-" + owner: "ms96590" + framework: FRAMEWORK_PYTORCH + description: "UQ D" + } + repositories: { + url: "https://github.com/ms96590/pytorch-classification-uncertainty" + owner: "ms96590" + framework: FRAMEWORK_PYTORCH + } } papers: { paper_id: "quantifying-classification-uncertainty-using" @@ -22062,15 +22248,15 @@ pr_id_to_video: { authors: "Niklas Kühl" authors: "Jakob Schöffer" repositories: { - url: "https://github.com/ptrckhmmr/DEAL" - owner: "ptrckhmmr" + is_official: true + url: "https://github.com/DeepLearningResearch/DEAL" + owner: "DeepLearningResearch" framework: FRAMEWORK_TENSORFLOW number_of_stars: 11 } repositories: { - is_official: true - url: "https://github.com/DeepLearningResearch/DEAL" - owner: "DeepLearningResearch" + url: "https://github.com/ptrckhmmr/DEAL" + owner: "ptrckhmmr" framework: FRAMEWORK_TENSORFLOW number_of_stars: 11 } @@ -22114,14 +22300,14 @@ pr_id_to_video: { url: "https://github.com/tongzheng1992/E-CNN-classifier" owner: "tongzheng1992" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 + number_of_stars: 7 description: "This is the available code for the paper `An evidential classifier based on Dempster-Shafer theory and deep learning' (arXiv preprint arXiv:2103.13549)" } } video: { video_id: "07XpNGlN7ko" video_title: "PR-226: Evidential Deep Learning to Quantify Classification Uncertainty" - number_of_views: 951 + number_of_views: 982 published_date: { seconds: 1581863352 } @@ -22156,7 +22342,7 @@ pr_id_to_video: { url: "https://github.com/vuoristo/MMAML-Regression" owner: "vuoristo" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 9 } methods: { name: "MAML" @@ -22186,21 +22372,20 @@ pr_id_to_video: { paper_id: "attribute-modulated-generative-meta-learning" title: "Attribute-Modulated Generative Meta Learning for Zero-Shot Classification" arxiv_id: "2104.10857" - abstract: "Zero-shot learning (ZSL) aims to transfer knowledge from seen classes to semantically related unseen classes, which are absent during training. The promising strategies for ZSL are to synthesize visual features of unseen classes conditioned on semantic side information and to incorporate meta-learning to eliminate the model's inherent bias towards seen classes. Existing meta generative approaches pursue a common model shared across task distributions; in contrast, we aim to construct a generative network adaptive to task characteristics. To this end, we propose the Attribute-Modulated generAtive meta-model for Zero-shot learning (AMAZ). Our model consists of an attribute-aware modulation network and an attribute-augmented generative network. Given unseen classes, the modulation network adaptively modulates the generator by applying task-specific transformations so that the generative network can adapt to highly diverse tasks. Our empirical evaluations on four widely-used benchmarks show that AMAZ improves state-of-the-art methods by 3.8% and 5.1% in ZSL and generalized ZSL settings, respectively, demonstrating the superiority of our method." + abstract: "Zero-shot learning (ZSL) aims to transfer knowledge from seen classes to semantically related unseen classes, which are absent during training. The promising strategies for ZSL are to synthesize visual features of unseen classes conditioned on semantic side information and to incorporate meta-learning to eliminate the model's inherent bias towards seen classes. While existing meta generative approaches pursue a common model shared across task distributions, we aim to construct a generative network adaptive to task characteristics. To this end, we propose an Attribute-Modulated generAtive meta-model for Zero-shot learning (AMAZ). Our model consists of an attribute-aware modulation network, an attribute-augmented generative network, and an attribute-weighted classifier. Given unseen classes, the modulation network adaptively modulates the generator by applying task-specific transformations so that the generative network can adapt to highly diverse tasks. The weighted classifier utilizes the data quality to enhance the training procedure, further improving the model performance. Our empirical evaluations on four widely-used benchmarks show that AMAZ outperforms state-of-the-art methods by 3.8% and 3.1% in ZSL and generalized ZSL settings, respectively, demonstrating the superiority of our method. Our experiments on a zero-shot image retrieval task show AMAZ's ability to synthesize instances that portray real visual characteristics." published_date: { seconds: 1619049600 } authors: "Yun Li" authors: "Zhe Liu" authors: "Lina Yao" - authors: "Xianzhi Wang" - authors: "Can Wang" + authors: "Xiaojun Chang" } papers: { paper_id: "model-agnostic-learning-to-meta-learn" title: "Model-Agnostic Learning to Meta-Learn" arxiv_id: "2012.02684" - abstract: "In this paper, we propose a learning algorithm that enables a model to quickly exploit commonalities among related tasks from an unseen task distribution, before quickly adapting to specific tasks from that same distribution. We investigate how learning with different task distributions can first improve adaptability by meta-finetuning on related tasks before improving goal task generalization with finetuning. Synthetic regression experiments validate the intuition that learning to meta-learn improves adaptability and consecutively generalization. The methodology, setup, and hypotheses in this proposal were positively evaluated by peer review before conclusive experiments were carried out." + abstract: "In this paper, we propose a learning algorithm that enables a model to quickly exploit commonalities among related tasks from an unseen task distribution, before quickly adapting to specific tasks from that same distribution. We investigate how learning with different task distributions can first improve adaptability by meta-finetuning on related tasks before improving goal task generalization with finetuning. Synthetic regression experiments validate the intuition that learning to meta-learn improves adaptability and consecutively generalization. Experiments on more complex image classification, continual regression, and reinforcement learning tasks demonstrate that learning to meta-learn generally improves task-specific adaptation. The methodology, setup, and hypotheses in this proposal were positively evaluated by peer review before conclusive experiments were carried out." published_date: { seconds: 1607040000 } @@ -22254,7 +22439,7 @@ pr_id_to_video: { url: "https://github.com/vuoristo/MMAML-Regression" owner: "vuoristo" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 9 } } papers: { @@ -22287,7 +22472,7 @@ pr_id_to_video: { video_id: "R-vMHzNxDv8" video_title: "PR-227: Multimodal Model-Agnostic Meta-Learning via Task-Aware Modulation" number_of_likes: 26 - number_of_views: 833 + number_of_views: 843 published_date: { seconds: 1581864300 } @@ -22309,18 +22494,11 @@ pr_id_to_video: { } authors: "Zhichao Yin" authors: "Jianping Shi" - repositories: { - url: "https://github.com/raunaks13/GeoNet-PyTorch" - owner: "raunaks13" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A working implementation of GeoNet in PyTorch" - } repositories: { url: "https://github.com/yijie0710/GeoNet_pytorch" owner: "yijie0710" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 + number_of_stars: 24 description: " An unofficial PyTorch implementation of GeoNet" } repositories: { @@ -22328,9 +22506,16 @@ pr_id_to_video: { url: "https://github.com/yzcjtr/GeoNet" owner: "yzcjtr" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 656 + number_of_stars: 661 description: "Code for GeoNet: Unsupervised Learning of Dense Depth, Optical Flow and Camera Pose (CVPR 2018)" } + repositories: { + url: "https://github.com/raunaks13/GeoNet-PyTorch" + owner: "raunaks13" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "A working implementation of GeoNet in PyTorch" + } } papers: { paper_id: "unsupervised-joint-learning-of-depth-optical" @@ -22349,7 +22534,7 @@ pr_id_to_video: { url: "https://github.com/jianfenglihg/Unsupervised_geometry" owner: "jianfenglihg" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 8 } methods: { name: "Residual Connection" @@ -22443,7 +22628,7 @@ pr_id_to_video: { url: "https://github.com/jianfenglihg/UnOpticalFlow" owner: "jianfenglihg" framework: FRAMEWORK_PYTORCH - number_of_stars: 19 + number_of_stars: 21 description: "Occlusion Aware Unsupervised Learning of Optical Flow From Video" } } @@ -22476,7 +22661,7 @@ pr_id_to_video: { url: "https://github.com/saic-vul/odometry" owner: "saic-vul" framework: FRAMEWORK_OTHERS - number_of_stars: 36 + number_of_stars: 37 description: "Training Deep SLAM on Single Frames https://arxiv.org/abs/1912.05405" } } @@ -22520,7 +22705,7 @@ pr_id_to_video: { url: "https://github.com/chenxuluo/EPC" owner: "chenxuluo" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 21 + number_of_stars: 22 description: "Every Pixel Counts ++: Joint Learning of Geometry and Motion with 3D Holistic Understanding" } } @@ -22539,15 +22724,15 @@ pr_id_to_video: { url: "https://github.com/wrlife/RNN_depth_pose" owner: "wrlife" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 31 + number_of_stars: 32 description: "Recurrent Neural Network for (Un-)supervised Learning of Monocular VideoVisual Odometry and Depth" } } video: { video_id: "QO3YKTyQLRs" video_title: "PR-228: Geonet: Unsupervised learning of dense depth, optical flow and camera pose" - number_of_likes: 22 - number_of_views: 838 + number_of_likes: 26 + number_of_views: 881 published_date: { seconds: 1582468376 } @@ -22571,26 +22756,26 @@ pr_id_to_video: { authors: "Haoqi Fan" authors: "Jitendra Malik" authors: "Kaiming He" - repositories: { - url: "https://github.com/wangxiang1230/SSTAP" - owner: "wangxiang1230" - framework: FRAMEWORK_PYTORCH - number_of_stars: 13 - description: "Code for our CVPR 2021 Paper \"Self-Supervised Learning for Semi-Supervised Temporal Action Proposal\"." - } repositories: { url: "https://github.com/facebookresearch/pytorchvideo" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1644 + number_of_stars: 1723 description: "A deep learning library for video understanding research." } repositories: { - url: "https://github.com/LukasHedegaard/co3d" - owner: "LukasHedegaard" + url: "https://github.com/chaitanyadwivedii/3D-Attention-is-All-You-Need" + owner: "chaitanyadwivedii" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Continual 3D Convolutional Neural Networks" + number_of_stars: 1 + description: "A transformer based video question answering system " + } + repositories: { + url: "https://github.com/open-mmlab/mmaction2" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1115 + description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" } repositories: { url: "https://github.com/youngjun0627/movie-rating" @@ -22602,22 +22787,22 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/SlowFast" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 3909 + number_of_stars: 4061 description: "PySlowFast: video understanding codebase from FAIR for reproducing state-of-the-art video models." } repositories: { - url: "https://github.com/open-mmlab/mmaction2" - owner: "open-mmlab" + url: "https://github.com/LukasHedegaard/co3d" + owner: "LukasHedegaard" framework: FRAMEWORK_PYTORCH - number_of_stars: 1016 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + number_of_stars: 3 + description: "Continual 3D Convolutional Neural Networks" } repositories: { - url: "https://github.com/chaitanyadwivedii/3D-Attention-is-All-You-Need" - owner: "chaitanyadwivedii" + url: "https://github.com/wangxiang1230/SSTAP" + owner: "wangxiang1230" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A transformer based video question answering system " + number_of_stars: 18 + description: "Code for our CVPR 2021 Paper \"Self-Supervised Learning for Semi-Supervised Temporal Action Proposal\"." } } papers: { @@ -22638,7 +22823,7 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/SlowFast" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 3909 + number_of_stars: 4061 description: "PySlowFast: video understanding codebase from FAIR for reproducing state-of-the-art video models." } } @@ -22655,51 +22840,51 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/pytorchvideo" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1644 + number_of_stars: 1723 description: "A deep learning library for video understanding research." } repositories: { - url: "https://github.com/LukasHedegaard/co3d" - owner: "LukasHedegaard" + url: "https://github.com/kkahatapitiya/X3D-Multigrid" + owner: "kkahatapitiya" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Continual 3D Convolutional Neural Networks" + number_of_stars: 44 + description: "PyTorch implementation of X3D models with Multigrid training." } repositories: { - is_official: true - url: "https://github.com/facebookresearch/SlowFast" - owner: "facebookresearch" + url: "https://github.com/open-mmlab/mmaction2" + owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 3909 - description: "PySlowFast: video understanding codebase from FAIR for reproducing state-of-the-art video models." - } - repositories: { - url: "https://github.com/Chianugoogidi/X3D-tf" - owner: "Chianugoogidi" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "An implementation of the X3D video recognition architecture in TensorFlow/Keras" + number_of_stars: 1115 + description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" } repositories: { url: "https://github.com/ZJCV/X3D" owner: "ZJCV" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 8 description: "[CVPR 2020] X3D: Expanding Architectures for Efficient Video Recognition" } repositories: { - url: "https://github.com/open-mmlab/mmaction2" - owner: "open-mmlab" + url: "https://github.com/Chianugoogidi/X3D-tf" + owner: "Chianugoogidi" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "An implementation of the X3D video recognition architecture in TensorFlow/Keras" + } + repositories: { + is_official: true + url: "https://github.com/facebookresearch/SlowFast" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1016 - description: "OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark" + number_of_stars: 4061 + description: "PySlowFast: video understanding codebase from FAIR for reproducing state-of-the-art video models." } repositories: { - url: "https://github.com/kkahatapitiya/X3D-Multigrid" - owner: "kkahatapitiya" + url: "https://github.com/LukasHedegaard/co3d" + owner: "LukasHedegaard" framework: FRAMEWORK_PYTORCH - number_of_stars: 40 - description: "PyTorch implementation of X3D models with Multigrid training." + number_of_stars: 3 + description: "Continual 3D Convolutional Neural Networks" } } papers: { @@ -22713,21 +22898,21 @@ pr_id_to_video: { authors: "Gilad Sharir" authors: "Asaf Noy" authors: "Lihi Zelnik-Manor" + repositories: { + url: "https://github.com/lucidrains/STAM-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 86 + description: "Implementation of STAM (Space Time Attention Model), a pure and simple attention model that reaches SOTA for video classification" + } repositories: { is_official: true url: "https://github.com/Alibaba-MIIL/STAM" owner: "Alibaba-MIIL" framework: FRAMEWORK_PYTORCH - number_of_stars: 161 + number_of_stars: 166 description: "Official implementation of \"An Image is Worth 16x16 Words, What is a Video Worth?\" (2021 paper) " } - repositories: { - url: "https://github.com/lucidrains/STAM-pytorch" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 85 - description: "Implementation of STAM (Space Time Attention Model), a pure and simple attention model that reaches SOTA for video classification" - } methods: { name: "Convolution" full_name: "Convolution" @@ -22778,7 +22963,7 @@ pr_id_to_video: { paper_id: "video-transformer-network" title: "Video Transformer Network" arxiv_id: "2102.00719" - abstract: "This paper presents VTN, a transformer-based framework for video recognition. Inspired by recent developments in vision transformers, we ditch the standard approach in video action recognition that relies on 3D ConvNets and introduce a method that classifies actions by attending to the entire video sequence information. Our approach is generic and builds on top of any given 2D spatial network. In terms of wall runtime, it trains $16.1\\times$ faster and runs $5.1\\times$ faster during inference while maintaining competitive accuracy compared to other state-of-the-art methods. It enables whole video analysis, via a single end-to-end pass, while requiring $1.5\\times$ fewer GFLOPs. We report competitive results on Kinetics-400 and present an ablation study of VTN properties and the trade-off between accuracy and inference speed. We hope our approach will serve as a new baseline and start a fresh line of research in the video recognition domain. Code and models will be available soon." + abstract: "This paper presents VTN, a transformer-based framework for video recognition. Inspired by recent developments in vision transformers, we ditch the standard approach in video action recognition that relies on 3D ConvNets and introduce a method that classifies actions by attending to the entire video sequence information. Our approach is generic and builds on top of any given 2D spatial network. In terms of wall runtime, it trains $16.1\\times$ faster and runs $5.1\\times$ faster during inference while maintaining competitive accuracy compared to other state-of-the-art methods. It enables whole video analysis, via a single end-to-end pass, while requiring $1.5\\times$ fewer GFLOPs. We report competitive results on Kinetics-400 and present an ablation study of VTN properties and the trade-off between accuracy and inference speed. We hope our approach will serve as a new baseline and start a fresh line of research in the video recognition domain. Code and models are available at: https://github.com/bomri/SlowFast/blob/master/projects/vtn/README.md" published_date: { seconds: 1612137600 } @@ -22788,10 +22973,10 @@ pr_id_to_video: { authors: "Dotan Asselmann" repositories: { is_official: true - url: "https://github.com/bomri/SlowFast" - owner: "bomri" + url: "https://github.com/bomri/SlowFast/blob/master/projects/vtn/README.md" + owner: "vtn" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 8 description: "PySlowFast: video understanding codebase from FAIR for reproducing state-of-the-art video models." } methods: { @@ -22818,15 +23003,15 @@ pr_id_to_video: { url: "https://github.com/Sense-X/X-Temporal" owner: "Sense-X" framework: FRAMEWORK_PYTORCH - number_of_stars: 441 + number_of_stars: 440 description: "A general video understanding codebase from SenseTime X-Lab" } } video: { video_id: "nOdnHhco39E" video_title: "PR-229: SlowFast Networks for Video Recognition" - number_of_likes: 25 - number_of_views: 1930 + number_of_likes: 29 + number_of_views: 2031 published_date: { seconds: 1582493685 } @@ -22850,30 +23035,18 @@ pr_id_to_video: { authors: "Łukasz Kaiser" authors: "Anselm Levskaya" repositories: { - url: "https://github.com/sliao-mi-luku/NLP-Chatbot-Reformer-Trax" - owner: "sliao-mi-luku" - framework: FRAMEWORK_OTHERS - description: "Building a chatbot using the Reformer model by Trax" - } - repositories: { - url: "https://github.com/kiss2smiles/nlp_reading" - owner: "kiss2smiles" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - } - repositories: { - url: "https://github.com/lucidrains/DALLE-pytorch" + url: "https://github.com/lucidrains/reformer-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 3154 - description: "Implementation / replication of DALL-E, OpenAI's Text to Image Transformer, in Pytorch" + number_of_stars: 1568 + description: "Reformer, the efficient Transformer, in Pytorch" } repositories: { - url: "https://github.com/huggingface/transformers" - owner: "huggingface" + url: "https://github.com/Rick-McCoy/Reformer-pytorch" + owner: "Rick-McCoy" framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 - description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + number_of_stars: 69 + description: "Implements Reformer: The Efficient Transformer in pytorch." } repositories: { url: "https://github.com/jpata/SparseDistance" @@ -22882,6 +23055,14 @@ pr_id_to_video: { number_of_stars: 1 description: "Generate sparse distance matrices with gradients in tensorflow efficiently" } + repositories: { + is_official: true + url: "https://github.com/google/trax/tree/master/trax/models/reformer" + owner: "models" + framework: FRAMEWORK_OTHERS + number_of_stars: 6382 + description: "Trax — Deep Learning with Clear Code and Speed" + } repositories: { url: "https://github.com/lucashueda/long_sentence_transformer" owner: "lucashueda" @@ -22890,26 +23071,30 @@ pr_id_to_video: { description: "Experiments with long sentences Transformers methods (LongFormer and Reformer)" } repositories: { - url: "https://github.com/lucidrains/reformer-pytorch" + url: "https://github.com/lucidrains/DALLE-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 1538 - description: "Reformer, the efficient Transformer, in Pytorch" + number_of_stars: 3268 + description: "Implementation / replication of DALL-E, OpenAI's Text to Image Transformer, in Pytorch" } repositories: { - url: "https://github.com/Rick-McCoy/Reformer-pytorch" - owner: "Rick-McCoy" - framework: FRAMEWORK_PYTORCH - number_of_stars: 68 - description: "Implements Reformer: The Efficient Transformer in pytorch." + url: "https://github.com/kiss2smiles/nlp_reading" + owner: "kiss2smiles" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 } repositories: { - is_official: true - url: "https://github.com/google/trax/tree/master/trax/models/reformer" - owner: "models" + url: "https://github.com/sliao-mi-luku/NLP-Chatbot-Reformer-Trax" + owner: "sliao-mi-luku" framework: FRAMEWORK_OTHERS - number_of_stars: 6300 - description: "Trax — Deep Learning with Clear Code and Speed" + description: "Building a chatbot using the Reformer model by Trax" + } + repositories: { + url: "https://github.com/huggingface/transformers" + owner: "huggingface" + framework: FRAMEWORK_PYTORCH + number_of_stars: 49984 + description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } methods: { name: "GELU" @@ -23043,23 +23228,11 @@ pr_id_to_video: { authors: "Liu Yang" authors: "Sebastian Ruder" authors: "Donald Metzler" - repositories: { - url: "https://github.com/guyd1995/lra-benchmark" - owner: "guyd1995" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/dar-tau/lra-benchmark" - owner: "dar-tau" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } repositories: { url: "https://github.com/google-research/bigbird" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 279 + number_of_stars: 293 description: "Transformers for Longer Sequences" } repositories: { @@ -23067,9 +23240,21 @@ pr_id_to_video: { url: "https://github.com/google-research/long-range-arena" owner: "google-research" framework: FRAMEWORK_OTHERS - number_of_stars: 302 + number_of_stars: 329 description: "Long Range Arena for Benchmarking Efficient Transformers" } + repositories: { + url: "https://github.com/dar-tau/lra-benchmark" + owner: "dar-tau" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } + repositories: { + url: "https://github.com/guyd1995/lra-benchmark" + owner: "guyd1995" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -23134,33 +23319,33 @@ pr_id_to_video: { authors: "Madian Khabsa" authors: "Han Fang" authors: "Hao Ma" - repositories: { - url: "https://github.com/microsoft/vision-longformer" - owner: "microsoft" - framework: FRAMEWORK_PYTORCH - number_of_stars: 103 - } - repositories: { - url: "https://github.com/The-AI-Summer/self-attention-cv" - owner: "The-AI-Summer" - framework: FRAMEWORK_PYTORCH - number_of_stars: 478 - description: "Implementation of various self-attention mechanisms focused on computer vision. Ongoing repository. " - } repositories: { url: "https://github.com/kuixu/Linear-Multihead-Attention" owner: "kuixu" framework: FRAMEWORK_PYTORCH - number_of_stars: 41 + number_of_stars: 42 description: "Reproducing the Linear Multihead Attention introduced in Linformer paper (Linformer: Self-Attention with Linear Complexity)" } repositories: { url: "https://github.com/tatp22/linformer-pytorch" owner: "tatp22" framework: FRAMEWORK_PYTORCH - number_of_stars: 267 + number_of_stars: 275 description: "My take on a practical implementation of Linformer for Pytorch." } + repositories: { + url: "https://github.com/The-AI-Summer/self-attention-cv" + owner: "The-AI-Summer" + framework: FRAMEWORK_PYTORCH + number_of_stars: 513 + description: "Implementation of various self-attention mechanisms focused on computer vision. Ongoing repository. " + } + repositories: { + url: "https://github.com/microsoft/vision-longformer" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 114 + } methods: { name: "Linformer" full_name: "Linformer" @@ -23297,24 +23482,38 @@ pr_id_to_video: { authors: "Ruixiang Zhang" authors: "Josh Susskind" repositories: { - url: "https://github.com/rish-16/aft-pytorch" - owner: "rish-16" + url: "https://github.com/ShenDezhou/aft-pytorch" + owner: "ShenDezhou" framework: FRAMEWORK_PYTORCH - number_of_stars: 145 - description: "Unofficial PyTorch implementation of Attention Free Transformer (AFT) layers by Apple Inc." + number_of_stars: 1 + description: "Unofficial PyTorch implementation of **Attention Free Transformer**'s layers by [Zhai](https://twitter.com/zhaisf?lang=en), et al. [[abs](https://openreview.net/forum?id=pW--cu2FCHY), [pdf](https://arxiv.org/pdf/2105.14103.pdf)] from Apple Inc." + } + repositories: { + url: "https://github.com/BlinkDL/RWKV-LM" + owner: "BlinkDL" + framework: FRAMEWORK_PYTORCH + number_of_stars: 30 + description: "The RWKV Language Model" } repositories: { url: "https://github.com/karynaur/Attention-Free-minGPT" owner: "karynaur" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + number_of_stars: 5 } repositories: { url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/aft" owner: "transformers" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + number_of_stars: 3490 + description: "🧑‍🏫 Implementations/tutorials of deep learning papers with side-by-side notes 📝; including transformers (original, xl, switch, feedback, vit), optimizers (adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), 🎮 reinforcement learning (ppo, dqn), capsnet, distillation, etc. 🧠" + } + repositories: { + url: "https://github.com/rish-16/aft-pytorch" + owner: "rish-16" + framework: FRAMEWORK_PYTORCH + number_of_stars: 151 + description: "Unofficial PyTorch implementation of Attention Free Transformer (AFT) layers by Apple Inc." } methods: { name: "Residual Connection" @@ -23371,7 +23570,7 @@ pr_id_to_video: { video_id: "alIgXKHbZII" video_title: "PR-230: Reformer: The Efficient Transformer" number_of_likes: 13 - number_of_views: 798 + number_of_views: 832 published_date: { seconds: 1584072405 } @@ -23396,71 +23595,72 @@ pr_id_to_video: { authors: "Mohammad Norouzi" authors: "Geoffrey Hinton" repositories: { - url: "https://github.com/vturrisi/solo-learn" - owner: "vturrisi" - framework: FRAMEWORK_PYTORCH - number_of_stars: 47 - description: "solo-learn: a library of self-supervised methods for visual representation learning powered by Pytorch Lightning" - } - repositories: { - url: "https://github.com/beresandras/contrastive-classification-keras" - owner: "beresandras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "Implementation of self-supervised image-level contrastive pretraining methods using Keras." + url: "https://github.com/Mayurji/SimCLR" + owner: "Mayurji" + framework: FRAMEWORK_OTHERS + description: "Self-Supervised Learning approach to learn contrasting representation between images." } repositories: { - url: "https://github.com/bjader/QSSL" - owner: "bjader" + url: "https://github.com/sidwa/ae_thesis" + owner: "sidwa" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "Code for Quantum Self-Supervised Learning" } repositories: { - url: "https://github.com/lightly-ai/lightly" - owner: "lightly-ai" + url: "https://github.com/wvangansbeke/Unsupervised-Classification" + owner: "wvangansbeke" framework: FRAMEWORK_PYTORCH - number_of_stars: 1064 - description: "A python library for self-supervised learning on images." + number_of_stars: 753 + description: "SCAN: Learning to Classify Images without Labels, incl. SimCLR. [ECCV 2020]" } repositories: { - url: "https://github.com/liuch37/image-processing" - owner: "liuch37" + url: "https://github.com/AndrewAtanov/simclr-pytorch" + owner: "AndrewAtanov" framework: FRAMEWORK_PYTORCH - description: "Implement several common while useful image processing methods listed in the literature." + number_of_stars: 45 + description: "PyTorch implementation of SimCLR: supports multi-GPU training and closely reproduces results" } repositories: { - url: "https://github.com/facebookresearch/vissl" - owner: "facebookresearch" + url: "https://github.com/sthalles/SimCLR" + owner: "sthalles" framework: FRAMEWORK_PYTORCH - number_of_stars: 1788 - description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." + number_of_stars: 972 + description: "PyTorch implementation of SimCLR: A Simple Framework for Contrastive Learning of Visual Representations" } repositories: { - url: "https://github.com/Daniel-H-99/SimCLR" - owner: "Daniel-H-99" + url: "https://github.com/spijkervet/simclr" + owner: "spijkervet" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 368 + description: "PyTorch implementation of SimCLR: A Simple Framework for Contrastive Learning of Visual Representations by T. Chen et al." } repositories: { - url: "https://github.com/cjrd/self-supervised-pretraining" - owner: "cjrd" + url: "https://github.com/HobbitLong/SupContrast" + owner: "HobbitLong" framework: FRAMEWORK_PYTORCH - number_of_stars: 29 - description: "Repository providing a wide range of self-supervised pretrained models for computer vision tasks." + number_of_stars: 1174 + description: "PyTorch implementation of \"Supervised Contrastive Learning\" (and SimCLR incidentally)" } repositories: { - url: "https://github.com/edureisMD/ConVIRT-pytorch" - owner: "edureisMD" + url: "https://github.com/sayakpaul/SimCLR-in-TensorFlow-2" + owner: "sayakpaul" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 65 + description: "(Minimally) implements SimCLR (https://arxiv.org/abs/2002.05709) in TensorFlow 2." + } + repositories: { + url: "https://github.com/reppy4620/SimCLR4Paint" + owner: "reppy4620" framework: FRAMEWORK_PYTORCH - number_of_stars: 29 - description: "Contrastive Learning Representations for Images and Text Pairs. Pytorch implementation of ConVIRT Paper." + number_of_stars: 2 + description: "A simple PyTorch implementation of SimCLR using illustrations as dataset." } repositories: { - url: "https://github.com/Liut2016/ecg-supcontrast" - owner: "Liut2016" + url: "https://github.com/AidenDurrant/MoCo-Pytorch" + owner: "AidenDurrant" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 37 + description: "An unofficial Pytorch implementation of \"Improved Baselines with Momentum Contrastive Learning\" (MoCoV2) - X. Chen, et al." } methods: { name: "ResNet" @@ -23555,7 +23755,7 @@ pr_id_to_video: { url: "https://github.com/salesforce/MoPro" owner: "salesforce" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 + number_of_stars: 46 description: "MoPro: Webly Supervised Learning" } } @@ -23574,7 +23774,7 @@ pr_id_to_video: { url: "https://github.com/yixinL7/SimCLS" owner: "yixinL7" framework: FRAMEWORK_PYTORCH - number_of_stars: 30 + number_of_stars: 50 description: "Code for our paper \"SimCLS: A Simple Framework for Contrastive Learning of Abstractive Summarization\", ACL 2021" } methods: { @@ -23691,65 +23891,65 @@ pr_id_to_video: { url: "https://github.com/PyTorchLightning/pytorch-lightning" owner: "PyTorchLightning" framework: FRAMEWORK_PYTORCH - number_of_stars: 14508 + number_of_stars: 15003 description: "The lightweight PyTorch wrapper for high-performance AI research. Scale your models, not the boilerplate." } methods: { - name: "ResNet" - full_name: "Residual Network" - description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + name: "Random Gaussian Blur" + full_name: "Random Gaussian Blur" + description: "**Random Gaussian Blur** is an image data augmentation technique where we randomly blur the image using a Gaussian distribution.\r\n\r\nImage Source: [Wikipedia](https://en.wikipedia.org/wiki/Gaussian_blur)" } methods: { - name: "MoCo" - full_name: "Momentum Contrast" - description: "**MoCo**, or **Momentum Contrast**, is a self-supervised learning algorithm with a contrastive loss. \r\n\r\nContrastive loss methods can be thought of as building dynamic dictionaries. The \"keys\" (tokens) in the dictionary are sampled from data (e.g., images or patches) and are represented by an encoder network. Unsupervised learning trains encoders to perform dictionary look-up: an encoded “query” should be similar to its matching key and dissimilar to others. Learning is formulated as minimizing a contrastive loss. \r\n\r\nMoCo can be viewed as a way to build large and consistent dictionaries for unsupervised learning with a contrastive loss. In MoCo, we maintain the dictionary as a queue of data samples: the encoded representations of the current mini-batch are enqueued, and the oldest are dequeued. The queue decouples the dictionary size from the mini-batch size, allowing it to be large. Moreover, as the dictionary keys come from the preceding several mini-batches, a slowly progressing key encoder, implemented as a momentum-based moving average of the query encoder, is proposed to maintain consistency." + name: "NT-Xent" + full_name: "Normalized Temperature-scaled Cross Entropy Loss" + description: "**NT-Xent**, or **Normalized Temperature-scaled Cross Entropy Loss**, is a loss function. Let $\\text{sim}\\left(\\mathbf{u}, \\mathbf{v}\\right) = \\mathbf{u}^{T}\\mathbf{v}/||\\mathbf{u}|| ||\\mathbf{v}||$ denote the cosine similarity between two vectors $\\mathbf{u}$ and $\\mathbf{v}$. Then the loss function for a positive pair of examples $\\left(i, j\\right)$ is :\r\n\r\n$$ \\mathbb{l}\\_{i,j} = -\\log\\frac{\\exp\\left(\\text{sim}\\left(\\mathbf{z}\\_{i}, \\mathbf{z}\\_{j}\\right)/\\tau\\right)}{\\sum^{2N}\\_{k=1}\\mathcal{1}\\_{[k\\neq{i}]}\\exp\\left(\\text{sim}\\left(\\mathbf{z}\\_{i}, \\mathbf{z}\\_{k}\\right)/\\tau\\right)}$$\r\n\r\nwhere $\\mathcal{1}\\_{[k\\neq{i}]} \\in ${$0, 1$} is an indicator function evaluating to $1$ iff $k\\neq{i}$ and $\\tau$ denotes a temperature parameter. The final loss is computed across all positive pairs, both $\\left(i, j\\right)$ and $\\left(j, i\\right)$, in a mini-batch.\r\n\r\nSource: [SimCLR](https://paperswithcode.com/method/simclr#)" } methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" } methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." } methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" } methods: { - name: "InfoNCE" - full_name: "InfoNCE" - description: "**InfoNCE**, where NCE stands for Noise-Contrastive Estimation, is a type of contrastive loss function used for [self-supervised learning](https://paperswithcode.com/methods/category/self-supervised-learning).\r\n\r\nGiven a set $X = ${$x\\_{1}, \\dots, x\\_{N}$} of $N$ random samples containing one positive sample from $p\\left(x\\_{t+k}|c\\_{t}\\right)$ and $N − 1$ negative samples from the 'proposal' distribution $p\\left(x\\_{t+k}\\right)$, we optimize:\r\n\r\n$$ \\mathcal{L}\\_{N} = - \\mathbb{E}\\_{X}\\left[\\log\\frac{f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right)}{\\sum\\_{x\\_{j}\\in{X}}f\\_{k}\\left(x\\_{j}, c\\_{t}\\right)}\\right] $$\r\n\r\nOptimizing this loss will result in $f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right)$ estimating the density ratio, which is:\r\n\r\n$$ f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right) \\propto \\frac{p\\left(x\\_{t+k}|c\\_{t}\\right)}{p\\left(x\\_{t+k}\\right)} $$" + name: "SimCLR" + full_name: "SimCLR" + description: "**SimCLR** is a framework for contrastive learning of visual representations. It learns representations by maximizing agreement between differently augmented views of the same data example via a contrastive loss in the latent space. It consists of:\r\n\r\n- A stochastic data augmentation module that transforms any given data example randomly resulting in two correlated views of the same example, denoted $\\mathbf{\\tilde{x}\\_{i}}$ and $\\mathbf{\\tilde{x}\\_{j}}$, which is considered a positive pair. SimCLR sequentially applies three simple augmentations: random cropping followed by resize back to the original size, random color distortions, and random Gaussian blur. The authors find random crop and color distortion is crucial to achieve good performance.\r\n\r\n- A neural network base encoder $f\\left(·\\right)$ that extracts representation vectors from augmented data examples. The framework allows various choices of the network architecture without any constraints. The authors opt for simplicity and adopt ResNet to obtain $h\\_{i} = f\\left(\\mathbf{\\tilde{x}}\\_{i}\\right) = \\text{ResNet}\\left(\\mathbf{\\tilde{x}}\\_{i}\\right)$ where $h\\_{i} \\in \\mathbb{R}^{d}$ is the output after the average pooling layer.\r\n\r\n- A small neural network projection head $g\\left(·\\right)$ that maps representations to the space where contrastive loss is applied. Authors use a MLP with one hidden layer to obtain $z\\_{i} = g\\left(h\\_{i}\\right) = W^{(2)}\\sigma\\left(W^{(1)}h\\_{i}\\right)$ where $\\sigma$ is a ReLU nonlinearity. The authors find it beneficial to define the contrastive loss on $z\\_{i}$’s rather than $h\\_{i}$’s.\r\n\r\n- A contrastive loss function defined for a contrastive prediction task. Given a set {$\\mathbf{\\tilde{x}}\\_{k}$} including a positive pair of examples $\\mathbf{\\tilde{x}}\\_{i}$ and $\\mathbf{\\tilde{x}\\_{j}}$ , the contrastive prediction task aims to identify $\\mathbf{\\tilde{x}}\\_{j}$ in {$\\mathbf{\\tilde{x}}\\_{k}$}$\\_{k\\neq{i}}$ for a given $\\mathbf{\\tilde{x}}\\_{i}$.\r\n\r\nA minibatch of $N$ examples is randomly sampled and the contrastive prediction task is defined on pairs of augmented examples derived from the minibatch, resulting in $2N$ data points. Negative examples are not sampled explicitly. Instead, given a positive pair, the other $2(N − 1)$ augmented examples within a minibatch are treated as negative examples. A NT-Xent (the normalized\r\ntemperature-scaled cross entropy loss) loss function is used (see components)." } methods: { - name: "ColorJitter" - full_name: "Color Jitter" - description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." } methods: { - name: "Feedforward Network" - full_name: "Feedforward Network" - description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + name: "Random Resized Crop" + full_name: "Random Resized Crop" + description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" } } video: { video_id: "FWhM3juUM6s" video_title: "PR-231: A Simple Framework for Contrastive Learning of Visual Representations" - number_of_likes: 97 - number_of_views: 5914 + number_of_likes: 102 + number_of_views: 6218 published_date: { seconds: 1583683713 } @@ -23773,20 +23973,20 @@ pr_id_to_video: { authors: "Chen Liang" authors: "David R. So" authors: "Quoc V. Le" + repositories: { + url: "https://github.com/KnollFrank/automl_zero" + owner: "KnollFrank" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + } repositories: { is_official: true url: "https://github.com/google-research/google-research/tree/master/automl_zero" owner: "master" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18403 + number_of_stars: 18790 description: "Google Research" } - repositories: { - url: "https://github.com/KnollFrank/automl_zero" - owner: "KnollFrank" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 - } methods: { name: "AutoML-Zero" full_name: "AutoML-Zero" @@ -23810,11 +24010,10 @@ pr_id_to_video: { authors: "Honglak Lee" authors: "Aleksandra Faust" repositories: { - url: "https://github.com/AurelianTactics/dqnclipped_dqnreg_prelim_implementation" - owner: "AurelianTactics" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Implementing DQNClipped and DQNReg Algorithms" + url: "https://github.com/jjgarau/DAGPolicyGradient" + owner: "jjgarau" + framework: FRAMEWORK_OTHERS + number_of_stars: 5 } repositories: { is_official: true @@ -23825,10 +24024,11 @@ pr_id_to_video: { description: "Supplementary Data for Evolving Reinforcement Learning Algorithms" } repositories: { - url: "https://github.com/jjgarau/DAGPolicyGradient" - owner: "jjgarau" - framework: FRAMEWORK_OTHERS - number_of_stars: 5 + url: "https://github.com/AurelianTactics/dqnclipped_dqnreg_prelim_implementation" + owner: "AurelianTactics" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + description: "Implementing DQNClipped and DQNReg Algorithms" } methods: { name: "Convolution" @@ -23863,12 +24063,6 @@ pr_id_to_video: { authors: "Andrew Brock" authors: "Karen Simonyan" authors: "Quoc V. Le" - repositories: { - url: "https://github.com/mnikitin/EvoNorm" - owner: "mnikitin" - framework: FRAMEWORK_OTHERS - description: "Gluon implementation of EvoNorm" - } repositories: { url: "https://github.com/sayakpaul/EvoNorms-in-TensorFlow-2" owner: "sayakpaul" @@ -23876,11 +24070,17 @@ pr_id_to_video: { number_of_stars: 10 description: "Implements EvoNorms B0 and S0 as proposed in Evolving Normalization-Activation Layers." } + repositories: { + url: "https://github.com/mnikitin/EvoNorm" + owner: "mnikitin" + framework: FRAMEWORK_OTHERS + description: "Gluon implementation of EvoNorm" + } repositories: { url: "https://github.com/lonePatient/EvoNorms_PyTorch" owner: "lonePatient" framework: FRAMEWORK_PYTORCH - number_of_stars: 17 + number_of_stars: 16 description: "Evolving Normalization-Activation Layers" } repositories: { @@ -23894,7 +24094,7 @@ pr_id_to_video: { url: "https://github.com/wandb/gallery" owner: "wandb" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 186 + number_of_stars: 190 description: "This is a collection of the code that accompanies the reports in The Gallery by Weights & Biases." } repositories: { @@ -23908,14 +24108,14 @@ pr_id_to_video: { url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } repositories: { url: "https://github.com/wandb/awesome-dl-projects" owner: "wandb" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 186 + number_of_stars: 190 description: "This is a collection of the code that accompanies the reports in The Gallery by Weights & Biases." } methods: { @@ -23989,7 +24189,7 @@ pr_id_to_video: { url: "https://github.com/PerdonLiu/CSE-Autoloss" owner: "PerdonLiu" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 + number_of_stars: 47 description: "Seach Losses of our paper 'Loss Function Discovery for Object Detection via Convergence-Simulation Driven Search', accepted by ICLR 2021." } } @@ -24056,21 +24256,21 @@ pr_id_to_video: { authors: "Xin He" authors: "Kaiyong Zhao" authors: "Xiaowen Chu" - repositories: { - url: "https://github.com/daya6489/DriveML" - owner: "daya6489" - framework: FRAMEWORK_OTHERS - number_of_stars: 8 - description: "Self-Drive Machine Learning Projects" - } repositories: { is_official: true url: "https://github.com/marsggbo/automl_a_survey_of_state_of_the_art" owner: "marsggbo" framework: FRAMEWORK_OTHERS - number_of_stars: 11 + number_of_stars: 12 description: "AutoML: A Survey of State-of-the-Art" } + repositories: { + url: "https://github.com/daya6489/DriveML" + owner: "daya6489" + framework: FRAMEWORK_OTHERS + number_of_stars: 9 + description: "Self-Drive Machine Learning Projects" + } methods: { name: "Tanh Activation" full_name: "Tanh Activation" @@ -24096,7 +24296,7 @@ pr_id_to_video: { video_id: "J__uJ79m01Q" video_title: "PR-232: AutoML-Zero:Evolving Machine Learning Algorithms From Scratch" number_of_likes: 26 - number_of_views: 1459 + number_of_views: 1490 published_date: { seconds: 1584287452 } @@ -24139,7 +24339,7 @@ pr_id_to_video: { url: "https://github.com/andravin/algebranets" owner: "andravin" framework: FRAMEWORK_OTHERS - number_of_stars: 12 + number_of_stars: 14 description: "Unofficial Experiments with AlgebraNets" } } @@ -24164,6 +24364,14 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH number_of_stars: 1 } + repositories: { + is_official: true + url: "https://github.com/google/uncertainty-baselines" + owner: "google" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 514 + description: "High-quality implementations of standard and SOTA methods on a variety of tasks." + } methods: { name: "SGD" full_name: "Stochastic Gradient Descent" @@ -24234,7 +24442,7 @@ pr_id_to_video: { video_id: "45-7XgJeI-E" video_title: "PR-233: Multiplicative Interactions and Where To Find Them" number_of_likes: 6 - number_of_views: 500 + number_of_views: 501 published_date: { seconds: 1584350201 } @@ -24257,22 +24465,11 @@ pr_id_to_video: { authors: "Assaf Shocher" authors: "Nadav Cohen" authors: "Michal Irani" - repositories: { - url: "https://github.com/Weifeng73/Zero-Shot-Super-resolution" - owner: "Weifeng73" - framework: FRAMEWORK_OTHERS - description: "Computer Vision Course 2019 Final Project in ZJU " - } - repositories: { - url: "https://github.com/mohit1997/ZSSR" - owner: "mohit1997" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/assafshocher/ZSSR" owner: "assafshocher" framework: FRAMEWORK_OTHERS - number_of_stars: 322 + number_of_stars: 327 description: "\"Zero-Shot\" Super-Resolution using Deep Internal Learning" } repositories: { @@ -24296,6 +24493,17 @@ pr_id_to_video: { number_of_stars: 6 description: "The Keras Re-implementation of the awesome “Zero-Shot” Super-Resolution using Deep Internal Learning" } + repositories: { + url: "https://github.com/mohit1997/ZSSR" + owner: "mohit1997" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/Weifeng73/Zero-Shot-Super-resolution" + owner: "Weifeng73" + framework: FRAMEWORK_OTHERS + description: "Computer Vision Course 2019 Final Project in ZJU " + } } papers: { paper_id: "across-scales-across-dimensions-temporal" @@ -24315,7 +24523,7 @@ pr_id_to_video: { url: "https://github.com/eyalnaor/DeepTemporalSR" owner: "eyalnaor" framework: FRAMEWORK_PYTORCH - number_of_stars: 24 + number_of_stars: 33 description: "Across Scales & Across Dimensions - Temporal Super-Resolution using Deep Internal Learning" } } @@ -24331,18 +24539,18 @@ pr_id_to_video: { authors: "Sunwoo Cho" authors: "Nam Ik Cho" repositories: { - is_official: true - url: "https://github.com/JWSoh/MZSR" + url: "https://github.com/JWSoh/MainSR" owner: "JWSoh" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 208 - description: "Meta-Transfer Learning for Zero-Shot Super-Resolution (CVPR, 2020)" + number_of_stars: 5 } repositories: { - url: "https://github.com/JWSoh/MainSR" + is_official: true + url: "https://github.com/JWSoh/MZSR" owner: "JWSoh" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 + number_of_stars: 213 + description: "Meta-Transfer Learning for Zero-Shot Super-Resolution (CVPR, 2020)" } } papers: { @@ -24357,28 +24565,29 @@ pr_id_to_video: { authors: "Assaf Shocher" authors: "Michal Irani" repositories: { - url: "https://github.com/RomanovIgnat/KernelGAN" - owner: "RomanovIgnat" + url: "https://github.com/BillOuyang/KernelGan-s3" + owner: "BillOuyang" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } repositories: { - url: "https://github.com/Lornatang/KernelGAN" - owner: "Lornatang" + is_official: true + url: "https://github.com/sefibk/KernelGAN" + owner: "sefibk" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 247 } repositories: { - url: "https://github.com/BillOuyang/KernelGan-s3" - owner: "BillOuyang" + url: "https://github.com/Lornatang/KernelGAN" + owner: "Lornatang" framework: FRAMEWORK_PYTORCH number_of_stars: 1 } repositories: { - is_official: true - url: "https://github.com/sefibk/KernelGAN" - owner: "sefibk" + url: "https://github.com/RomanovIgnat/KernelGAN" + owner: "RomanovIgnat" framework: FRAMEWORK_PYTORCH - number_of_stars: 242 + number_of_stars: 1 } } papers: { @@ -24399,7 +24608,7 @@ pr_id_to_video: { url: "https://github.com/parkseobin/MLSR" owner: "parkseobin" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 42 + number_of_stars: 45 description: "Source code for ECCV2020 \"Fast Adaptation to Super-Resolution Networks via Meta-Learning\"" } } @@ -24430,8 +24639,8 @@ pr_id_to_video: { video: { video_id: "WwuHjgTwCR0" video_title: "PR-234: Zero-Shot Super-Resolution using Deep Internal Learning" - number_of_likes: 22 - number_of_views: 1530 + number_of_likes: 26 + number_of_views: 1574 published_date: { seconds: 1584884633 } @@ -24555,19 +24764,19 @@ pr_id_to_video: { authors: "Xiaoyun Wang" authors: "John D. Owens" authors: "Yixuan Li" - repositories: { - url: "https://github.com/omegafragger/DDU" - owner: "omegafragger" - framework: FRAMEWORK_PYTORCH - number_of_stars: 22 - description: "Code for Deterministic Neural Networks with Appropriate Inductive Biases Capture Epistemic and Aleatoric Uncertainty" - } repositories: { is_official: true url: "https://github.com/wetliu/energy_ood" owner: "wetliu" framework: FRAMEWORK_PYTORCH - number_of_stars: 176 + number_of_stars: 179 + } + repositories: { + url: "https://github.com/omegafragger/DDU" + owner: "omegafragger" + framework: FRAMEWORK_PYTORCH + number_of_stars: 24 + description: "Code for Deterministic Neural Networks with Appropriate Inductive Biases Capture Epistemic and Aleatoric Uncertainty" } methods: { name: "WideResNet" @@ -24683,7 +24892,7 @@ pr_id_to_video: { video_id: "eGU3vvBhkck" video_title: "PR-235: Input Complexity and Out-of-distribution Detection with Likelihood-based Generative Models" number_of_likes: 16 - number_of_views: 847 + number_of_views: 861 published_date: { seconds: 1584887384 } @@ -24710,53 +24919,46 @@ pr_id_to_video: { authors: "Thomas S. Huang" authors: "Lei Zhang" repositories: { - url: "https://github.com/ducongju/HRNet" - owner: "ducongju" - framework: FRAMEWORK_PYTORCH - description: "The project is an official implementation of our CVPR2019 paper \"Deep High-Resolution Representation Learning for Human Pose Estimation\"" - } - repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" + url: "https://github.com/xn1997/HigherHRnet" + owner: "xn1997" framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + number_of_stars: 1 + description: "HigherHRnet注释+tensorrt转换" } repositories: { - url: "https://github.com/Darius-Liesis/HRNet-works" - owner: "Darius-Liesis" + url: "https://github.com/laowang666888/HRNET" + owner: "laowang666888" framework: FRAMEWORK_PYTORCH - description: "Practice with HRNet_Human-Pose-Estimation" } repositories: { - url: "https://github.com/anshky/HR-NET" - owner: "anshky" + url: "https://github.com/AlongRide/Py3torch_HigherHRNet" + owner: "AlongRide" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } repositories: { - url: "https://github.com/abhi1kumar/hrnet_pose_single_gpu" - owner: "abhi1kumar" + url: "https://github.com/luohuan2uestc/pose-multi" + owner: "luohuan2uestc" framework: FRAMEWORK_PYTORCH - description: "Forked from original HR-Net Pose but made to run to single GPU" + number_of_stars: 1 } repositories: { - url: "https://github.com/visionNoob/hrnet_pytorch" - owner: "visionNoob" + is_official: true + url: "https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation" + owner: "HRNet" framework: FRAMEWORK_PYTORCH + number_of_stars: 886 + description: "This is an official implementation of our CVPR 2020 paper \"HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation\" (https://arxiv.org/abs/1908.10357)" } repositories: { - url: "https://github.com/wsjzha/deep-high-resolution-net.pytorch" - owner: "wsjzha" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/gox-ai/hrnet-pose-api" - owner: "gox-ai" + url: "https://github.com/baoshengyu/deep-high-resolution-net.pytorch" + owner: "baoshengyu" framework: FRAMEWORK_PYTORCH + description: "The project is an official implementation of our CVPR2019 paper \"Deep High-Resolution Representation Learning for Human Pose Estimation\"" } repositories: { - url: "https://github.com/LiuShenLan/HRNet" - owner: "LiuShenLan" + url: "https://github.com/sdll/hrnet-pose-estimation" + owner: "sdll" framework: FRAMEWORK_PYTORCH } repositories: { @@ -24766,173 +24968,183 @@ pr_id_to_video: { number_of_stars: 1 description: "The project is a modified implementation of CVPR2019 paper \"Deep High-Resolution Representation Learning for Human Pose Estimation\". It has been changed to train the model on standard dataset as well as drone captured dataset to help in search and rescue operation in Natural Disaster like situation." } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "HRNet" - full_name: "HRNet" - description: "**HRNet**, or **High-Resolution Net**, is a general purpose convolutional neural network for tasks like semantic segmentation, object detection and image classification. It is able to maintain high resolution representations through the whole process. We start from a high-resolution convolution stream, gradually add high-to-low resolution convolution streams one by one, and connect the multi-resolution streams in parallel. The resulting network consists of several ($4$ in the paper) stages and\r\nthe $n$th stage contains $n$ streams corresponding to $n$ resolutions. The authors conduct repeated multi-resolution fusions by exchanging the information across the parallel streams over and over." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - } - papers: { - paper_id: "scalenas-one-shot-learning-of-scale-aware" - title: "ScaleNAS: One-Shot Learning of Scale-Aware Representations for Visual Recognition" - arxiv_id: "2011.14584" - abstract: "Scale variance among different sizes of body parts and objects is a challenging problem for visual recognition tasks. Existing works usually design dedicated backbone or apply Neural architecture Search(NAS) for each task to tackle this challenge. However, existing works impose significant limitations on the design or search space. To solve these problems, we present ScaleNAS, a one-shot learning method for exploring scale-aware representations. ScaleNAS solves multiple tasks at a time by searching multi-scale feature aggregation. ScaleNAS adopts a flexible search space that allows an arbitrary number of blocks and cross-scale feature fusions. To cope with the high search cost incurred by the flexible space, ScaleNAS employs one-shot learning for multi-scale supernet driven by grouped sampling and evolutionary search. Without further retraining, ScaleNet can be directly deployed for different visual recognition tasks with superior performance. We use ScaleNAS to create high-resolution models for two different tasks, ScaleNet-P for human pose estimation and ScaleNet-S for semantic segmentation. ScaleNet-P and ScaleNet-S outperform existing manually crafted and NAS-based methods in both tasks. When applying ScaleNet-P to bottom-up human pose estimation, it surpasses the state-of-the-art HigherHRNet. In particular, ScaleNet-P4 achieves 71.6% AP on COCO test-dev, achieving new state-of-the-art result." - published_date: { - seconds: 1606694400 - } - authors: "Hsin-Pai Cheng" - authors: "Feng Liang" - authors: "Meng Li" - authors: "Bowen Cheng" - authors: "Feng Yan" - authors: "Hai Li" - authors: "Vikas Chandra" - authors: "Yiran Chen" - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." - } - methods: { - name: "Scale Aggregation Block" - full_name: "Scale Aggregation Block" - description: "A **Scale Aggregation Block** concatenates feature maps at a wide range of scales. Feature maps for each scale are generated by a stack of downsampling, convolution and upsampling operations. The proposed scale aggregation block is a standard computational module which readily replaces any given transformation $\\mathbf{Y}=\\mathbf{T}(\\mathbf{X})$, where $\\mathbf{X}\\in \\mathbb{R}^{H\\times W\\times C}$, $\\mathbf{Y}\\in \\mathbb{R}^{H\\times W\\times C_o}$ with $C$ and $C_o$ being the input and output channel number respectively. $\\mathbf{T}$ is any operator such as a convolution layer or a series of convolution layers. Assume we have $L$ scales. Each scale $l$ is generated by sequentially conducting a downsampling $\\mathbf{D}_l$, a transformation $\\mathbf{T}_l$ and an unsampling operator $\\mathbf{U}_l$:\r\n\r\n$$\r\n\\mathbf{X}^{'}_l=\\mathbf{D}_l(\\mathbf{X}),\r\n\\label{eq:eq_d}\r\n$$\r\n\r\n$$\r\n\\mathbf{Y}^{'}_l=\\mathbf{T}_l(\\mathbf{X}^{'}_l),\r\n\\label{eq:eq_tl}\r\n$$\r\n\r\n$$\r\n\\mathbf{Y}_l=\\mathbf{U}_l(\\mathbf{Y}^{'}_l),\r\n\\label{eq:eq_u}\r\n$$\r\n\r\nwhere $\\mathbf{X}^{'}_l\\in \\mathbb{R}^{H_l\\times W_l\\times C}$,\r\n$\\mathbf{Y}^{'}_l\\in \\mathbb{R}^{H_l\\times W_l\\times C_l}$, and\r\n$\\mathbf{Y}_l\\in \\mathbb{R}^{H\\times W\\times C_l}$.\r\nNotably, $\\mathbf{T}_l$ has the similar structure as $\\mathbf{T}$.\r\nWe can concatenate all $L$ scales together, getting\r\n\r\n$$\r\n\\mathbf{Y}^{'}=\\Vert^L_1\\mathbf{U}_l(\\mathbf{T}_l(\\mathbf{D}_l(\\mathbf{X}))),\r\n\\label{eq:eq_all}\r\n$$\r\n\r\nwhere $\\Vert$ indicates concatenating feature maps along the channel dimension, and $\\mathbf{Y}^{'} \\in \\mathbb{R}^{H\\times W\\times \\sum^L_1 C_l}$ is the final output feature maps of the scale aggregation block.\r\n\r\nIn the reference implementation, the downsampling $\\mathbf{D}_l$ with factor $s$ is implemented by a max pool layer with $s\\times s$ kernel size and $s$ stride. The upsampling $\\mathbf{U}_l$ is implemented by resizing with the nearest neighbor interpolation." - } - methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" - } - methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" - } - methods: { - name: "ScaleNet" - full_name: "ScaleNet" - description: "**ScaleNet**, or a **Scale Aggregation Network**, is a type of convolutional neural network which learns a neuron allocation for aggregating multi-scale information in different building blocks of a deep network. The most informative output neurons in each block are preserved while others are discarded, and thus neurons for multiple scales are competitively and adaptively allocated. The scale aggregation (SA) block concatenates feature maps at a wide range of scales. Feature maps for each scale are generated by a stack of downsampling, convolution and upsampling operations." - } - methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" - } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } - } - papers: { - paper_id: "bottom-up-human-pose-estimation-via" - title: "Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression" - arxiv_id: "2104.02300" - abstract: "In this paper, we are interested in the bottom-up paradigm of estimating human poses from an image. We study the dense keypoint regression framework that is previously inferior to the keypoint detection and grouping framework. Our motivation is that regressing keypoint positions accurately needs to learn representations that focus on the keypoint regions. We present a simple yet effective approach, named disentangled keypoint regression (DEKR). We adopt adaptive convolutions through pixel-wise spatial transformer to activate the pixels in the keypoint regions and accordingly learn representations from them. We use a multi-branch structure for separate regression: each branch learns a representation with dedicated adaptive convolutions and regresses one keypoint. The resulting disentangled representations are able to attend to the keypoint regions, respectively, and thus the keypoint regression is spatially more accurate. We empirically show that the proposed direct regression method outperforms keypoint detection and grouping methods and achieves superior bottom-up pose estimation results on two benchmark datasets, COCO and CrowdPose. The code and models are available at https://github.com/HRNet/DEKR." - published_date: { - seconds: 1617667200 - } - authors: "Zigang Geng" - authors: "Ke Sun" - authors: "Bin Xiao" - authors: "Zhaoxiang Zhang" - authors: "Jingdong Wang" repositories: { - is_official: true - url: "https://github.com/HRNet/DEKR" - owner: "HRNet" - framework: FRAMEWORK_PYTORCH - number_of_stars: 141 - description: "This is an official implementation of our CVPR 2021 paper \"Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression\" (https://arxiv.org/abs/2104.02300)" - } - methods: { - name: "Spatial Transformer" - full_name: "Spatial Transformer" - description: "A **Spatial Transformer** is an image model block that explicitly allows the spatial manipulation of data within a convolutional neural network. It gives CNNs the ability to actively spatially transform feature maps, conditional on the feature map itself, without any extra training supervision or modification to the optimisation process. Unlike pooling layers, where the receptive fields are fixed and local, the spatial transformer module is a dynamic mechanism that can actively spatially transform an image (or a feature map) by producing an appropriate transformation for each input sample. The transformation is then performed on the entire feature map (non-locally) and can include scaling, cropping, rotations, as well as non-rigid deformations.\r\n\r\nThe architecture is shown in the Figure to the right. The input feature map $U$ is passed to a localisation network which regresses the transformation parameters $\\theta$. The regular spatial grid $G$ over $V$ is transformed to the sampling grid $T\\_{\\theta}\\left(G\\right)$, which is applied to $U$, producing the warped output feature map $V$. The combination of the localisation network and sampling mechanism defines a spatial transformer." - } - } - papers: { - paper_id: "multi-person-pose-regression-via-pose" - title: "SMPR: Single-Stage Multi-Person Pose Regression" - arxiv_id: "2006.15576" - abstract: "Existing multi-person pose estimators can be roughly divided into two-stage approaches (top-down and bottom-up approaches) and one-stage approaches. The two-stage methods either suffer high computational redundancy for additional person detectors or group keypoints heuristically after predicting all the instance-free keypoints. The recently proposed single-stage methods do not rely on the above two extra stages but have lower performance than the latest bottom-up approaches. In this work, a novel single-stage multi-person pose regression, termed SMPR, is presented. It follows the paradigm of dense prediction and predicts instance-aware keypoints from every location. Besides feature aggregation, we propose better strategies to define positive pose hypotheses for training which all play an important role in dense pose estimation. The network also learns the scores of estimated poses. The pose scoring strategy further improves the pose estimation performance by prioritizing superior poses during non-maximum suppression (NMS). We show that our method not only outperforms existing single-stage methods and but also be competitive with the latest bottom-up methods, with 70.2 AP and 77.5 AP75 on the COCO test-dev pose benchmark. Code is available at https://github.com/cmdi-dlut/SMPR." - published_date: { - seconds: 1593302400 - } - authors: "Junqi Lin" - authors: "Huixin Miao" - authors: "Junjie Cao" - authors: "Zhixun Su" - authors: "Risheng Liu" - repositories: { - is_official: true - url: "https://github.com/cmdi-dlut/SMPR" - owner: "cmdi-dlut" + url: "https://github.com/LiuShenLan/HRNet" + owner: "LiuShenLan" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 } - } - papers: { - paper_id: "how-to-train-your-robust-human-pose-estimator" - title: "AID: Pushing the Performance Boundary of Human Pose Estimation with Information Dropping Augmentation" - arxiv_id: "2008.07139" - abstract: "Both appearance cue and constraint cue are vital for human pose estimation. However, there is a tendency in most existing works to overfitting the former and overlook the latter. In this paper, we propose Augmentation by Information Dropping (AID) to verify and tackle this dilemma. Alone with AID as a prerequisite for effectively exploiting its potential, we propose customized training schedules, which are designed by analyzing the pattern of loss and performance in training process from the perspective of information supplying. In experiments, as a model-agnostic approach, AID promotes various state-of-the-art methods in both bottom-up and top-down paradigms with different input sizes, frameworks, backbones, training and testing sets. On popular COCO human pose estimation test set, AID consistently boosts the performance of different configurations by around 0.6 AP in top-down paradigm and up to 1.5 AP in bottom-up paradigm. On more challenging CrowdPose dataset, the improvement is more than 1.5 AP. As AID successfully pushes the performance boundary of human pose estimation problem by considerable margin and sets a new state-of-the-art, we hope AID to be a regular configuration for training human pose estimators. The source code will be publicly available for further research." - published_date: { - seconds: 1597622400 - } - authors: "Junjie Huang" - authors: "Zheng Zhu" - authors: "Guan Huang" - authors: "Dalong Du" repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" + url: "https://github.com/gox-ai/hrnet-pose-api" + owner: "gox-ai" framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." - } - repositories: { - is_official: true - url: "https://github.com/HuangJunJie2017/UDP-Pose" - owner: "HuangJunJie2017" - framework: FRAMEWORK_OTHERS - number_of_stars: 211 - description: " Official code of The Devil is in the Details: Delving into Unbiased Data Processing for Human Pose Estimation" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + methods: { + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "HRNet" + full_name: "HRNet" + description: "**HRNet**, or **High-Resolution Net**, is a general purpose convolutional neural network for tasks like semantic segmentation, object detection and image classification. It is able to maintain high resolution representations through the whole process. We start from a high-resolution convolution stream, gradually add high-to-low resolution convolution streams one by one, and connect the multi-resolution streams in parallel. The resulting network consists of several ($4$ in the paper) stages and\r\nthe $n$th stage contains $n$ streams corresponding to $n$ resolutions. The authors conduct repeated multi-resolution fusions by exchanging the information across the parallel streams over and over." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + } + papers: { + paper_id: "scalenas-one-shot-learning-of-scale-aware" + title: "ScaleNAS: One-Shot Learning of Scale-Aware Representations for Visual Recognition" + arxiv_id: "2011.14584" + abstract: "Scale variance among different sizes of body parts and objects is a challenging problem for visual recognition tasks. Existing works usually design dedicated backbone or apply Neural architecture Search(NAS) for each task to tackle this challenge. However, existing works impose significant limitations on the design or search space. To solve these problems, we present ScaleNAS, a one-shot learning method for exploring scale-aware representations. ScaleNAS solves multiple tasks at a time by searching multi-scale feature aggregation. ScaleNAS adopts a flexible search space that allows an arbitrary number of blocks and cross-scale feature fusions. To cope with the high search cost incurred by the flexible space, ScaleNAS employs one-shot learning for multi-scale supernet driven by grouped sampling and evolutionary search. Without further retraining, ScaleNet can be directly deployed for different visual recognition tasks with superior performance. We use ScaleNAS to create high-resolution models for two different tasks, ScaleNet-P for human pose estimation and ScaleNet-S for semantic segmentation. ScaleNet-P and ScaleNet-S outperform existing manually crafted and NAS-based methods in both tasks. When applying ScaleNet-P to bottom-up human pose estimation, it surpasses the state-of-the-art HigherHRNet. In particular, ScaleNet-P4 achieves 71.6% AP on COCO test-dev, achieving new state-of-the-art result." + published_date: { + seconds: 1606694400 + } + authors: "Hsin-Pai Cheng" + authors: "Feng Liang" + authors: "Meng Li" + authors: "Bowen Cheng" + authors: "Feng Yan" + authors: "Hai Li" + authors: "Vikas Chandra" + authors: "Yiran Chen" + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Scale Aggregation Block" + full_name: "Scale Aggregation Block" + description: "A **Scale Aggregation Block** concatenates feature maps at a wide range of scales. Feature maps for each scale are generated by a stack of downsampling, convolution and upsampling operations. The proposed scale aggregation block is a standard computational module which readily replaces any given transformation $\\mathbf{Y}=\\mathbf{T}(\\mathbf{X})$, where $\\mathbf{X}\\in \\mathbb{R}^{H\\times W\\times C}$, $\\mathbf{Y}\\in \\mathbb{R}^{H\\times W\\times C_o}$ with $C$ and $C_o$ being the input and output channel number respectively. $\\mathbf{T}$ is any operator such as a convolution layer or a series of convolution layers. Assume we have $L$ scales. Each scale $l$ is generated by sequentially conducting a downsampling $\\mathbf{D}_l$, a transformation $\\mathbf{T}_l$ and an unsampling operator $\\mathbf{U}_l$:\r\n\r\n$$\r\n\\mathbf{X}^{'}_l=\\mathbf{D}_l(\\mathbf{X}),\r\n\\label{eq:eq_d}\r\n$$\r\n\r\n$$\r\n\\mathbf{Y}^{'}_l=\\mathbf{T}_l(\\mathbf{X}^{'}_l),\r\n\\label{eq:eq_tl}\r\n$$\r\n\r\n$$\r\n\\mathbf{Y}_l=\\mathbf{U}_l(\\mathbf{Y}^{'}_l),\r\n\\label{eq:eq_u}\r\n$$\r\n\r\nwhere $\\mathbf{X}^{'}_l\\in \\mathbb{R}^{H_l\\times W_l\\times C}$,\r\n$\\mathbf{Y}^{'}_l\\in \\mathbb{R}^{H_l\\times W_l\\times C_l}$, and\r\n$\\mathbf{Y}_l\\in \\mathbb{R}^{H\\times W\\times C_l}$.\r\nNotably, $\\mathbf{T}_l$ has the similar structure as $\\mathbf{T}$.\r\nWe can concatenate all $L$ scales together, getting\r\n\r\n$$\r\n\\mathbf{Y}^{'}=\\Vert^L_1\\mathbf{U}_l(\\mathbf{T}_l(\\mathbf{D}_l(\\mathbf{X}))),\r\n\\label{eq:eq_all}\r\n$$\r\n\r\nwhere $\\Vert$ indicates concatenating feature maps along the channel dimension, and $\\mathbf{Y}^{'} \\in \\mathbb{R}^{H\\times W\\times \\sum^L_1 C_l}$ is the final output feature maps of the scale aggregation block.\r\n\r\nIn the reference implementation, the downsampling $\\mathbf{D}_l$ with factor $s$ is implemented by a max pool layer with $s\\times s$ kernel size and $s$ stride. The upsampling $\\mathbf{U}_l$ is implemented by resizing with the nearest neighbor interpolation." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + } + methods: { + name: "ScaleNet" + full_name: "ScaleNet" + description: "**ScaleNet**, or a **Scale Aggregation Network**, is a type of convolutional neural network which learns a neuron allocation for aggregating multi-scale information in different building blocks of a deep network. The most informative output neurons in each block are preserved while others are discarded, and thus neurons for multiple scales are competitively and adaptively allocated. The scale aggregation (SA) block concatenates feature maps at a wide range of scales. Feature maps for each scale are generated by a stack of downsampling, convolution and upsampling operations." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + } + papers: { + paper_id: "bottom-up-human-pose-estimation-via" + title: "Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression" + arxiv_id: "2104.02300" + abstract: "In this paper, we are interested in the bottom-up paradigm of estimating human poses from an image. We study the dense keypoint regression framework that is previously inferior to the keypoint detection and grouping framework. Our motivation is that regressing keypoint positions accurately needs to learn representations that focus on the keypoint regions. We present a simple yet effective approach, named disentangled keypoint regression (DEKR). We adopt adaptive convolutions through pixel-wise spatial transformer to activate the pixels in the keypoint regions and accordingly learn representations from them. We use a multi-branch structure for separate regression: each branch learns a representation with dedicated adaptive convolutions and regresses one keypoint. The resulting disentangled representations are able to attend to the keypoint regions, respectively, and thus the keypoint regression is spatially more accurate. We empirically show that the proposed direct regression method outperforms keypoint detection and grouping methods and achieves superior bottom-up pose estimation results on two benchmark datasets, COCO and CrowdPose. The code and models are available at https://github.com/HRNet/DEKR." + published_date: { + seconds: 1617667200 + } + authors: "Zigang Geng" + authors: "Ke Sun" + authors: "Bin Xiao" + authors: "Zhaoxiang Zhang" + authors: "Jingdong Wang" + repositories: { + is_official: true + url: "https://github.com/HRNet/DEKR" + owner: "HRNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 157 + description: "This is an official implementation of our CVPR 2021 paper \"Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression\" (https://arxiv.org/abs/2104.02300)" + } + methods: { + name: "Spatial Transformer" + full_name: "Spatial Transformer" + description: "A **Spatial Transformer** is an image model block that explicitly allows the spatial manipulation of data within a convolutional neural network. It gives CNNs the ability to actively spatially transform feature maps, conditional on the feature map itself, without any extra training supervision or modification to the optimisation process. Unlike pooling layers, where the receptive fields are fixed and local, the spatial transformer module is a dynamic mechanism that can actively spatially transform an image (or a feature map) by producing an appropriate transformation for each input sample. The transformation is then performed on the entire feature map (non-locally) and can include scaling, cropping, rotations, as well as non-rigid deformations.\r\n\r\nThe architecture is shown in the Figure to the right. The input feature map $U$ is passed to a localisation network which regresses the transformation parameters $\\theta$. The regular spatial grid $G$ over $V$ is transformed to the sampling grid $T\\_{\\theta}\\left(G\\right)$, which is applied to $U$, producing the warped output feature map $V$. The combination of the localisation network and sampling mechanism defines a spatial transformer." + } + } + papers: { + paper_id: "multi-person-pose-regression-via-pose" + title: "SMPR: Single-Stage Multi-Person Pose Regression" + arxiv_id: "2006.15576" + abstract: "Existing multi-person pose estimators can be roughly divided into two-stage approaches (top-down and bottom-up approaches) and one-stage approaches. The two-stage methods either suffer high computational redundancy for additional person detectors or group keypoints heuristically after predicting all the instance-free keypoints. The recently proposed single-stage methods do not rely on the above two extra stages but have lower performance than the latest bottom-up approaches. In this work, a novel single-stage multi-person pose regression, termed SMPR, is presented. It follows the paradigm of dense prediction and predicts instance-aware keypoints from every location. Besides feature aggregation, we propose better strategies to define positive pose hypotheses for training which all play an important role in dense pose estimation. The network also learns the scores of estimated poses. The pose scoring strategy further improves the pose estimation performance by prioritizing superior poses during non-maximum suppression (NMS). We show that our method not only outperforms existing single-stage methods and but also be competitive with the latest bottom-up methods, with 70.2 AP and 77.5 AP75 on the COCO test-dev pose benchmark. Code is available at https://github.com/cmdi-dlut/SMPR." + published_date: { + seconds: 1593302400 + } + authors: "Junqi Lin" + authors: "Huixin Miao" + authors: "Junjie Cao" + authors: "Zhixun Su" + authors: "Risheng Liu" + repositories: { + is_official: true + url: "https://github.com/cmdi-dlut/SMPR" + owner: "cmdi-dlut" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + } + } + papers: { + paper_id: "how-to-train-your-robust-human-pose-estimator" + title: "AID: Pushing the Performance Boundary of Human Pose Estimation with Information Dropping Augmentation" + arxiv_id: "2008.07139" + abstract: "Both appearance cue and constraint cue are vital for human pose estimation. However, there is a tendency in most existing works to overfitting the former and overlook the latter. In this paper, we propose Augmentation by Information Dropping (AID) to verify and tackle this dilemma. Alone with AID as a prerequisite for effectively exploiting its potential, we propose customized training schedules, which are designed by analyzing the pattern of loss and performance in training process from the perspective of information supplying. In experiments, as a model-agnostic approach, AID promotes various state-of-the-art methods in both bottom-up and top-down paradigms with different input sizes, frameworks, backbones, training and testing sets. On popular COCO human pose estimation test set, AID consistently boosts the performance of different configurations by around 0.6 AP in top-down paradigm and up to 1.5 AP in bottom-up paradigm. On more challenging CrowdPose dataset, the improvement is more than 1.5 AP. As AID successfully pushes the performance boundary of human pose estimation problem by considerable margin and sets a new state-of-the-art, we hope AID to be a regular configuration for training human pose estimators. The source code will be publicly available for further research." + published_date: { + seconds: 1597622400 + } + authors: "Junjie Huang" + authors: "Zheng Zhu" + authors: "Guan Huang" + authors: "Dalong Du" + repositories: { + is_official: true + url: "https://github.com/HuangJunJie2017/UDP-Pose" + owner: "HuangJunJie2017" + framework: FRAMEWORK_OTHERS + number_of_stars: 218 + description: " Official code of The Devil is in the Details: Delving into Unbiased Data Processing for Human Pose Estimation" + } + repositories: { + url: "https://github.com/open-mmlab/mmpose" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1081 + description: "OpenMMLab Pose Estimation Toolbox and Benchmark." } methods: { name: "ReLU" @@ -25005,7 +25217,7 @@ pr_id_to_video: { url: "https://github.com/TeCSAR-UNCC/EfficientHRNet" owner: "TeCSAR-UNCC" framework: FRAMEWORK_PYTORCH - number_of_stars: 26 + number_of_stars: 29 } methods: { name: "HRNet" @@ -25061,8 +25273,8 @@ pr_id_to_video: { video: { video_id: "7AtbLCFlbNo" video_title: "PR-236: HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation" - number_of_likes: 19 - number_of_views: 1103 + number_of_likes: 23 + number_of_views: 1168 published_date: { seconds: 1585757156 } @@ -25092,72 +25304,74 @@ pr_id_to_video: { authors: "Han Zhang" authors: "Colin Raffel" repositories: { - url: "https://github.com/amazon-research/exponential-moving-average-normalization" - owner: "amazon-research" + is_official: true + url: "https://github.com/google-research/fixmatch" + owner: "google-research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 726 + description: "A simple method to perform semi-supervised learning with limited data." + } + repositories: { + url: "https://github.com/kekmodel/FixMatch-pytorch" + owner: "kekmodel" framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - description: "PyTorch implementation of EMAN for self-supervised and semi-supervised learning: https://arxiv.org/abs/2101.08482" + number_of_stars: 365 + description: "Unofficial PyTorch implementation of \"FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence\"" } repositories: { - url: "https://github.com/google-research/crest" - owner: "google-research" + url: "https://github.com/OFRIN/Tensorflow_FixMatch" + owner: "OFRIN" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 10 - description: "Repo for CReST: A Class-Rebalancing Self-Training Framework for Imbalanced Semi-Supervised Learning" + number_of_stars: 9 + description: "FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence" } repositories: { - url: "https://github.com/KaiyangZhou/ssdg-benchmark" - owner: "KaiyangZhou" + url: "https://github.com/valencebond/FixMatch_pytorch" + owner: "valencebond" framework: FRAMEWORK_PYTORCH - number_of_stars: 26 - description: "Benchmarks for semi-supervised domain generalization." + number_of_stars: 35 + description: "Unofficial PyTorch implementation of \"FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence\"" } repositories: { - url: "https://github.com/Celiali/FixMatch" - owner: "Celiali" + url: "https://github.com/CoinCheung/fixmatch" + owner: "CoinCheung" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "KTH Deep Learning advanced (DD2412) project. Task: Reproducing FixMatch and investigating on Noisy (Pseudo) Labels and confirmation Errors of FixMatch." + number_of_stars: 25 + description: "90%+ with 40 labels. please see the readme for details." } repositories: { - url: "https://github.com/Valentyn1997/semi-supervised-text-classification" - owner: "Valentyn1997" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/johnchenresearch/SSL" + owner: "johnchenresearch" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 1 - description: "Adaptation of FixMatch for Semi-supervised text classification " } repositories: { - url: "https://github.com/LeeDoYup/FixMatch-pytorch" - owner: "LeeDoYup" + url: "https://github.com/AhmadQasim/FixMatch" + owner: "AhmadQasim" framework: FRAMEWORK_PYTORCH - number_of_stars: 134 - description: "Unofficial Pytorch code for \"FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence\" in NeurIPS'20. This repo contains reproduced checkpoints." + description: "Forked from https://github.com/kekmodel/FixMatch-pytorch" } repositories: { - url: "https://github.com/gomezzz/MSMatch" - owner: "gomezzz" + url: "https://github.com/phanav/fixmatch-fastai" + owner: "phanav" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Code for the paper \"MSMatch: Semi-Supervised Multispectral Scene Classification with Few Labels\"" } repositories: { - url: "https://github.com/saranda-2811/FixMatch" - owner: "saranda-2811" - framework: FRAMEWORK_OTHERS + url: "https://github.com/GH3927/fixmatch_linemod_duck" + owner: "GH3927" + framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/A-Telfer/AugKey" - owner: "A-Telfer" - framework: FRAMEWORK_OTHERS - description: "RandAugment with Keypoints Annotation Support." + url: "https://github.com/CoinCheung/fixmatch-pytorch" + owner: "CoinCheung" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + description: "90%+ with 40 labels. please see the readme for details." } - repositories: { - is_official: true - url: "https://github.com/google-research/fixmatch" - owner: "google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 713 - description: "A simple method to perform semi-supervised learning with limited data." + methods: { + name: "FixMatch" + full_name: "FixMatch" + description: "FixMatch is an algorithm that first generates pseudo-labels using the model's predictions on weakly-augmented unlabeled images. For a given image, the pseudo-label is only retained if the model produces a high-confidence prediction. The model is then trained to predict the pseudo-label when fed a strongly-augmented version of the same image.\r\n\r\nDescription from: [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://paperswithcode.com/paper/fixmatch-simplifying-semi-supervised-learning)\r\n\r\nImage credit: [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://paperswithcode.com/paper/fixmatch-simplifying-semi-supervised-learning)" } } papers: { @@ -25197,7 +25411,7 @@ pr_id_to_video: { url: "https://github.com/yassouali/awesome-semi-supervised-learning" owner: "yassouali" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 698 + number_of_stars: 749 description: "😎 An up-to-date & curated list of awesome semi-supervised learning papers, methods & resources." } } @@ -25211,6 +25425,11 @@ pr_id_to_video: { } authors: "Leslie N. Smith" authors: "Adam Conovaloff" + methods: { + name: "FixMatch" + full_name: "FixMatch" + description: "FixMatch is an algorithm that first generates pseudo-labels using the model's predictions on weakly-augmented unlabeled images. For a given image, the pseudo-label is only retained if the model produces a high-confidence prediction. The model is then trained to predict the pseudo-label when fed a strongly-augmented version of the same image.\r\n\r\nDescription from: [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://paperswithcode.com/paper/fixmatch-simplifying-semi-supervised-learning)\r\n\r\nImage credit: [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://paperswithcode.com/paper/fixmatch-simplifying-semi-supervised-learning)" + } } papers: { paper_id: "a-simple-semi-supervised-learning-framework" @@ -25231,9 +25450,14 @@ pr_id_to_video: { url: "https://github.com/google-research/ssl_detection" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 214 + number_of_stars: 231 description: "Semi-supervised learning for object detection" } + methods: { + name: "STAC" + full_name: "STAC" + description: "**STAC** is a semi-supervised framework for visual object detection along with a data augmentation strategy. STAC deploys highly confident pseudo labels of localized objects from an unlabeled image and updates the model by enforcing consistency via strong augmentations. We generate pseudo labels (i.e., bounding boxes and their class labels) for unlabeled data using test-time inference, including NMS , of the teacher model trained with labeled data. We then compute unsupervised loss with respect to pseudo labels whose confidence scores are above a threshold $\\tau$ . The strong augmentations are applied for augmentation consistency during the model training. Target boxes are augmented when global geometric transformations are used." + } } papers: { paper_id: "the-semi-supervised-inaturalist-aves" @@ -25245,6 +25469,14 @@ pr_id_to_video: { } authors: "Jong-Chyi Su" authors: "Subhransu Maji" + repositories: { + is_official: true + url: "https://github.com/cvl-umass/semi-inat-2020" + owner: "cvl-umass" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "Semi-Supervised Fine-Grained Recognition Challenge at FGVC7" + } } papers: { paper_id: "openmatch-open-set-consistency-regularization" @@ -25257,6 +25489,11 @@ pr_id_to_video: { authors: "Kuniaki Saito" authors: "Donghyun Kim" authors: "Kate Saenko" + methods: { + name: "FixMatch" + full_name: "FixMatch" + description: "FixMatch is an algorithm that first generates pseudo-labels using the model's predictions on weakly-augmented unlabeled images. For a given image, the pseudo-label is only retained if the model produces a high-confidence prediction. The model is then trained to predict the pseudo-label when fed a strongly-augmented version of the same image.\r\n\r\nDescription from: [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://paperswithcode.com/paper/fixmatch-simplifying-semi-supervised-learning)\r\n\r\nImage credit: [FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence](https://paperswithcode.com/paper/fixmatch-simplifying-semi-supervised-learning)" + } } papers: { paper_id: "milking-cowmask-for-semi-supervised-image" @@ -25273,7 +25510,7 @@ pr_id_to_video: { url: "https://github.com/google-research/google-research/tree/master/milking_cowmask" owner: "master" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 + number_of_stars: 18789 description: "Google Research" } repositories: { @@ -25281,7 +25518,7 @@ pr_id_to_video: { url: "https://github.com/google-research/google-research" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 + number_of_stars: 18790 description: "Google Research" } methods: { @@ -25353,7 +25590,7 @@ pr_id_to_video: { url: "https://github.com/LiheYoung/ST-PlusPlus" owner: "LiheYoung" framework: FRAMEWORK_PYTORCH - number_of_stars: 15 + number_of_stars: 18 description: "ST++: Make Self-training Work Better for Semi-supervised Semantic Segmentation" } } @@ -25361,7 +25598,7 @@ pr_id_to_video: { video_id: "fOCxgrR95ew" video_title: "PR-237: FixMatch: Simplifying Semi-Supervised Learning with Consistency and Confidence" number_of_likes: 31 - number_of_views: 1950 + number_of_views: 2006 published_date: { seconds: 1586095079 } @@ -25402,7 +25639,7 @@ pr_id_to_video: { url: "https://github.com/arraystream/fftoptionlib" owner: "arraystream" framework: FRAMEWORK_OTHERS - number_of_stars: 37 + number_of_stars: 38 description: "FFT-based Option Pricing Methods in Python" } } @@ -25418,26 +25655,6 @@ pr_id_to_video: { authors: "Daniel Tarlow" authors: "Marc Brockschmidt" authors: "Richard Zemel" - repositories: { - url: "https://github.com/chingyaoc/ggnn.pytorch" - owner: "chingyaoc" - framework: FRAMEWORK_PYTORCH - number_of_stars: 375 - description: "A PyTorch Implementation of Gated Graph Sequence Neural Networks (GGNN)" - } - repositories: { - url: "https://github.com/fau-is/grm" - owner: "fau-is" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 - } - repositories: { - url: "https://github.com/Microsoft/graph-partition-neural-network-samples" - owner: "Microsoft" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 80 - description: "Sample Code for Graph Partition Neural Networks" - } repositories: { url: "https://github.com/bdqnghi/bi-tbcnn" owner: "bdqnghi" @@ -25449,30 +25666,50 @@ pr_id_to_video: { url: "https://github.com/Microsoft/gated-graph-neural-network-samples" owner: "Microsoft" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 968 + number_of_stars: 970 description: "Sample Code for Gated Graph Neural Networks" } repositories: { url: "https://github.com/JamesChuanggg/ggnn.pytorch" owner: "JamesChuanggg" framework: FRAMEWORK_PYTORCH - number_of_stars: 375 + number_of_stars: 382 description: "A PyTorch Implementation of Gated Graph Sequence Neural Networks (GGNN)" } repositories: { url: "https://github.com/entslscheia/GGNN_Reasoning" owner: "entslscheia" framework: FRAMEWORK_PYTORCH - number_of_stars: 31 + number_of_stars: 32 description: "PyTorch implementation for Graph Gated Neural Network (for Knowledge Graphs)" } repositories: { url: "https://github.com/yujiali/ggnn" owner: "yujiali" framework: FRAMEWORK_OTHERS - number_of_stars: 243 + number_of_stars: 244 description: "Gated Graph Sequence Neural Networks" } + repositories: { + url: "https://github.com/fau-is/grm" + owner: "fau-is" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + } + repositories: { + url: "https://github.com/Microsoft/graph-partition-neural-network-samples" + owner: "Microsoft" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 82 + description: "Sample Code for Graph Partition Neural Networks" + } + repositories: { + url: "https://github.com/chingyaoc/ggnn.pytorch" + owner: "chingyaoc" + framework: FRAMEWORK_PYTORCH + number_of_stars: 382 + description: "A PyTorch Implementation of Gated Graph Sequence Neural Networks (GGNN)" + } methods: { name: "GGS-NNs" full_name: "Gated Graph Sequence Neural Networks" @@ -25490,21 +25727,21 @@ pr_id_to_video: { authors: "Daniel Beck" authors: "Gholamreza Haffari" authors: "Trevor Cohn" - repositories: { - url: "https://github.com/Cartus/DCGCN" - owner: "Cartus" - framework: FRAMEWORK_OTHERS - number_of_stars: 60 - description: "Densely Connected Graph Convolutional Networks for Graph-to-Sequence Learning (authors' MXNet implementation for the TACL19 paper)" - } repositories: { is_official: true url: "https://github.com/beckdaniel/acl2018_graph2seq" owner: "beckdaniel" framework: FRAMEWORK_OTHERS - number_of_stars: 113 + number_of_stars: 116 description: "Code for \"Graph-to-Sequence Learning using Gated Graph Neural Networks\"" } + repositories: { + url: "https://github.com/Cartus/DCGCN" + owner: "Cartus" + framework: FRAMEWORK_OTHERS + number_of_stars: 61 + description: "Densely Connected Graph Convolutional Networks for Graph-to-Sequence Learning (authors' MXNet implementation for the TACL19 paper)" + } } papers: { paper_id: "empirical-evaluation-of-gated-recurrent" @@ -25518,20 +25755,6 @@ pr_id_to_video: { authors: "Caglar Gulcehre" authors: "KyungHyun Cho" authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/pushpendughosh/Stock-market-forecasting" - owner: "pushpendughosh" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 137 - description: "Forecasting directional movements of stock prices for intraday trading using LSTM and random forest" - } - repositories: { - is_official: true - url: "https://github.com/jych/librnn" - owner: "jych" - framework: FRAMEWORK_OTHERS - number_of_stars: 75 - } repositories: { url: "https://github.com/michaelfarrell76/End-To-End-Generative-Dialogue" owner: "michaelfarrell76" @@ -25552,6 +25775,26 @@ pr_id_to_video: { number_of_stars: 1 description: "Build deep learning models more efficient based on TensorFlow." } + repositories: { + is_official: true + url: "https://github.com/jych/librnn" + owner: "jych" + framework: FRAMEWORK_OTHERS + number_of_stars: 75 + } + repositories: { + url: "https://github.com/pushpendughosh/Stock-market-forecasting" + owner: "pushpendughosh" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 150 + description: "Forecasting directional movements of stock prices for intraday trading using LSTM and random forest" + } + repositories: { + url: "https://github.com/ratschlab/HIRID-ICU-Benchmark" + owner: "ratschlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + } methods: { name: "Tanh Activation" full_name: "Tanh Activation" @@ -25601,18 +25844,11 @@ pr_id_to_video: { authors: "Quoc Le" authors: "Geoffrey Hinton" authors: "Jeff Dean" - repositories: { - url: "https://github.com/jsuarez5341/Efficient-Dynamic-Batching" - owner: "jsuarez5341" - framework: FRAMEWORK_PYTORCH - number_of_stars: 31 - description: "Solves AI, transcends reality, infiltrates your mind" - } repositories: { url: "https://github.com/davidmrau/mixture-of-experts" owner: "davidmrau" framework: FRAMEWORK_PYTORCH - number_of_stars: 119 + number_of_stars: 125 description: "PyTorch Re-Implementation of \"The Sparsely-Gated Mixture-of-Experts Layer\" by Noam Shazeer et al. https://arxiv.org/abs/1701.06538" } repositories: { @@ -25622,6 +25858,13 @@ pr_id_to_video: { number_of_stars: 7 description: "Biological-Scale Neural Networks" } + repositories: { + url: "https://github.com/jsuarez5341/Efficient-Dynamic-Batching" + owner: "jsuarez5341" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "Solves AI, transcends reality, infiltrates your mind" + } methods: { name: "Tanh Activation" full_name: "Tanh Activation" @@ -25703,6 +25946,11 @@ pr_id_to_video: { number_of_stars: 78 description: "Python implementation of GLN in different frameworks" } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } methods: { name: "ReLU" full_name: "Rectified Linear Units" @@ -25712,7 +25960,7 @@ pr_id_to_video: { video: { video_id: "7iJqmbn8POU" video_title: "PR-238: Learning in Gated Neural Networks" - number_of_views: 383 + number_of_views: 390 published_date: { seconds: 1586704908 } @@ -25742,7 +25990,7 @@ pr_id_to_video: { url: "https://github.com/lmzintgraf/varibad" owner: "lmzintgraf" framework: FRAMEWORK_PYTORCH - number_of_stars: 69 + number_of_stars: 77 description: "Implementation of VariBAD: A Very Good Method for Bayes-Adaptive Deep RL via Meta-Learning - Zintgraf et al. (ICLR 2020)" } } @@ -25759,16 +26007,6 @@ pr_id_to_video: { authors: "Deirdre Quillen" authors: "Chelsea Finn" authors: "Sergey Levine" - repositories: { - url: "https://github.com/waterhorse1/Pearl_relabel" - owner: "waterhorse1" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/lujiayou123/Off-Policy-Meta-Reinforcement-Learning-via-Unsupervised-Domain-Translation" - owner: "lujiayou123" - framework: FRAMEWORK_OTHERS - } repositories: { url: "https://github.com/victorchan314/cs287_final_project" owner: "victorchan314" @@ -25779,9 +26017,19 @@ pr_id_to_video: { url: "https://github.com/katerakelly/oyster" owner: "katerakelly" framework: FRAMEWORK_PYTORCH - number_of_stars: 320 + number_of_stars: 321 description: "Implementation of Efficient Off-policy Meta-learning via Probabilistic Context Variables (PEARL)" } + repositories: { + url: "https://github.com/lujiayou123/Off-Policy-Meta-Reinforcement-Learning-via-Unsupervised-Domain-Translation" + owner: "lujiayou123" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/waterhorse1/Pearl_relabel" + owner: "waterhorse1" + framework: FRAMEWORK_OTHERS + } } papers: { paper_id: "meld-meta-reinforcement-learning-from-images" @@ -25801,7 +26049,7 @@ pr_id_to_video: { url: "https://github.com/tonyzhaozh/meld" owner: "tonyzhaozh" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 34 + number_of_stars: 35 description: "MELD: Meta-Reinforcement Learning from Images via Latent State Models https://arxiv.org/abs/2010.13957" } } @@ -25870,12 +26118,19 @@ pr_id_to_video: { authors: "Yingfeng Chen" authors: "Changjie Fan" authors: "Chongjie Zhang" + repositories: { + is_official: true + url: "https://github.com/NagisaZj/MetaCURE-Public" + owner: "NagisaZj" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } } video: { video_id: "phi7_QIhfJ4" video_title: "PR-239: Meta Reinforcement Learning as Task Inference" number_of_likes: 19 - number_of_views: 954 + number_of_views: 974 published_date: { seconds: 1586703101 } @@ -25902,7 +26157,7 @@ pr_id_to_video: { url: "https://github.com/hejingwenhejingwen/AdaFM" owner: "hejingwenhejingwen" framework: FRAMEWORK_PYTORCH - number_of_stars: 155 + number_of_stars: 156 description: "CVPR2019 (oral) Modulating Image Restoration with Continual Levels via Adaptive Feature Modification Layers (AdaFM). PyTorch implementation" } } @@ -25940,7 +26195,7 @@ pr_id_to_video: { url: "https://github.com/hejingwenhejingwen/CResMD" owner: "hejingwenhejingwen" framework: FRAMEWORK_PYTORCH - number_of_stars: 75 + number_of_stars: 78 description: "(ECCV 2020) Interactive Multi-Dimension Modulation with Dynamic Controllable Residual Learning for Image Restoration" } methods: { @@ -26010,7 +26265,7 @@ pr_id_to_video: { url: "https://github.com/hejingwenhejingwen/CSRNet" owner: "hejingwenhejingwen" framework: FRAMEWORK_PYTORCH - number_of_stars: 42 + number_of_stars: 47 description: "(ECCV 2020) Conditional Sequential Modulation for Efficient Global Image Retouching" } } @@ -26053,7 +26308,7 @@ pr_id_to_video: { url: "https://github.com/zhaohengyuan1/PAN" owner: "zhaohengyuan1" framework: FRAMEWORK_PYTORCH - number_of_stars: 192 + number_of_stars: 198 description: "(ECCV2020 Workshops) Efficient Image Super-Resolution Using Pixel Attention." } methods: { @@ -26065,8 +26320,8 @@ pr_id_to_video: { video: { video_id: "WXGqYbKQzWY" video_title: "PR-240: Modulating Image Restoration with Continual Levels via Adaptive Feature Modification Layers" - number_of_likes: 7 - number_of_views: 361 + number_of_likes: 8 + number_of_views: 371 published_date: { seconds: 1587309229 } @@ -26090,70 +26345,70 @@ pr_id_to_video: { authors: "Dequan Wang" authors: "Philipp Krähenbühl" repositories: { - url: "https://github.com/lee-man/movenet" - owner: "lee-man" + url: "https://github.com/Lakerszjb/CentereNet-C" + owner: "Lakerszjb" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Un-official implementation of MoveNet from Google" } repositories: { - url: "https://github.com/Kacper777777/simple-keras-CenterNet" - owner: "Kacper777777" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "A simplified and unofficial Keras implementation of CenterNet." + url: "https://github.com/MaximKuklin/3D_Object_Detection_Diploma" + owner: "MaximKuklin" + framework: FRAMEWORK_PYTORCH + description: "HSE Diploma 2021" } repositories: { - url: "https://github.com/ximilar-com/xcenternet" - owner: "ximilar-com" + url: "https://github.com/xggIoU/centernet_tensorflow_wilderface_voc" + owner: "xggIoU" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 131 - description: "Fast anchor free Object Detection based on CenterNet (Objects As Points) and TTFNet (Training-Time-Friendly Network). Implemented in TensorFlow 2.4+." + number_of_stars: 103 + description: "This is the unofficial implementation of the \"CenterNet:Objects as Points\".Just a simple try with self-modified shufflenetv2 and yolov3.If you want better results, you need more experiments." } repositories: { - url: "https://github.com/vietanhdev/open-adas" - owner: "vietanhdev" - framework: FRAMEWORK_OTHERS - number_of_stars: 121 - description: "An open source advanced driver assistance system (ADAS) that uses Jetson Nano as the hardware. Features: Traffic sign detection, Forward collision warning, Lane departure warning." + url: "https://github.com/Khalifa-2020/centernet2" + owner: "Khalifa-2020" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } repositories: { - url: "https://github.com/zgcr/simpleAICV-pytorch-ImageNet-COCO-training" - owner: "zgcr" - framework: FRAMEWORK_PYTORCH - number_of_stars: 162 - description: "Training examples and results for ImageNet(ILSVRC2012)/COCO2017/VOC2007+VOC2012 datasets.Include ResNet/DarkNet/RegNet/RetinaNet/FCOS/CenterNet/YOLO series." + url: "https://github.com/xuannianz/keras-CenterNet" + owner: "xuannianz" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 159 + description: "CenterNet (Objects as Points) implementation in Keras and Tensorflow" } repositories: { - url: "https://github.com/MaximKuklin/3D_Object_Detection_Diploma" - owner: "MaximKuklin" + url: "https://github.com/tensorboy/centerpose" + owner: "tensorboy" framework: FRAMEWORK_PYTORCH - description: "HSE Diploma 2021" + number_of_stars: 214 + description: "Push the Extreme of the pose estimation" } repositories: { - url: "https://github.com/JungminChung/Korean_Struc_Det" - owner: "JungminChung" + url: "https://github.com/zhaott618/centernet_codes_notes" + owner: "zhaott618" framework: FRAMEWORK_PYTORCH - description: "IRC에서 주관하는 '한국 건축물 검출 알고리즘 대회'를 위한 Repo" + number_of_stars: 3 + description: "centernet codes notes while learning" } repositories: { - url: "https://github.com/developer0hye/Simple-CenterNet" - owner: "developer0hye" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "PyTorch Implementation of CenterNet(Object as Points)" + url: "https://github.com/Stick-To/CenterNet-tensorflow" + owner: "Stick-To" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 131 + description: "CenterNet: Objects as Points in Tensorflow" } repositories: { - url: "https://github.com/tiagoCuervo/JapaNet" - owner: "tiagoCuervo" + url: "https://github.com/see--/keras-centernet" + owner: "see--" framework: FRAMEWORK_TENSORFLOW - description: "Detection and classification of Kuzushiji characters for the Kuzushiji Recognition Kaggle challenge using CenterNet as detector and multiple classifiers" + number_of_stars: 317 + description: "A Keras implementation of CenterNet with pre-trained model (unofficial)" } repositories: { - url: "https://github.com/wL1u/CenterNet-VisDrone" - owner: "wL1u" + url: "https://github.com/tteepe/CenterNet-pytorch-lightning" + owner: "tteepe" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 + number_of_stars: 18 + description: "Refactored implementation of CenterNet (Objects as Points - Zhou, Xingyi et. al.) shipping with PyTorch Lightning modules" } } papers: { @@ -26167,20 +26422,21 @@ pr_id_to_video: { authors: "Xingyi Zhou" authors: "Vladlen Koltun" authors: "Philipp Krähenbühl" - repositories: { - url: "https://github.com/Feynman1999/MgeEditing" - owner: "Feynman1999" - framework: FRAMEWORK_OTHERS - number_of_stars: 20 - description: "MgeEditing is an open source image and video editing toolbox based on MegEngine." - } repositories: { url: "https://github.com/JialianW/TraDeS" owner: "JialianW" framework: FRAMEWORK_PYTORCH - number_of_stars: 330 + number_of_stars: 350 description: "Track to Detect and Segment: An Online Multi-Object Tracker (CVPR 2021)" } + repositories: { + is_official: true + url: "https://github.com/xingyizhou/CenterTrack" + owner: "xingyizhou" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1755 + description: "Simultaneous object detection and tracking using center points." + } repositories: { url: "https://github.com/hampen2929/CenterTrack" owner: "hampen2929" @@ -26195,12 +26451,11 @@ pr_id_to_video: { description: "Self-supervised learning for Multi-Object Tracking with contrastive learning, keypoint estimation, and constraint dominant set clustering" } repositories: { - is_official: true - url: "https://github.com/xingyizhou/CenterTrack" - owner: "xingyizhou" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1713 - description: "Simultaneous object detection and tracking using center points." + url: "https://github.com/Feynman1999/MgeEditing" + owner: "Feynman1999" + framework: FRAMEWORK_OTHERS + number_of_stars: 22 + description: "MgeEditing is an open source image and video editing toolbox based on MegEngine." } } papers: { @@ -26215,55 +26470,60 @@ pr_id_to_video: { authors: "Xingyi Zhou" authors: "Philipp Krähenbühl" repositories: { - url: "https://github.com/tianweiy/CenterPoint-KITTI" + is_official: true + url: "https://github.com/tianweiy/CenterPoint" owner: "tianweiy" framework: FRAMEWORK_PYTORCH - number_of_stars: 54 + number_of_stars: 761 } repositories: { - url: "https://github.com/darrenjkt/CenterPoint" - owner: "darrenjkt" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - } - repositories: { - url: "https://github.com/CarkusL/CenterPoint" - owner: "CarkusL" + url: "https://github.com/chowkamlee81/CentrePointNet" + owner: "chowkamlee81" framework: FRAMEWORK_PYTORCH - number_of_stars: 37 - description: "Export CenterPoint PonintPillars ONNX Model For TensorRT" + number_of_stars: 7 } repositories: { - url: "https://github.com/abhigoku10/CenterPoint_PC" - owner: "abhigoku10" + url: "https://github.com/zion-king/Center-based-3D-Object-Detection-and-Tracking" + owner: "zion-king" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 2 } repositories: { url: "https://github.com/open-mmlab/mmdetection3d" owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 1326 + number_of_stars: 1419 description: "OpenMMLab's next-generation platform for general 3D object detection." } repositories: { - url: "https://github.com/zion-king/Center-based-3D-Object-Detection-and-Tracking" - owner: "zion-king" + url: "https://github.com/abhigoku10/CenterPoint_PC" + owner: "abhigoku10" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 1 } repositories: { - url: "https://github.com/chowkamlee81/CentrePointNet" - owner: "chowkamlee81" + url: "https://github.com/CarkusL/CenterPoint" + owner: "CarkusL" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 + number_of_stars: 50 + description: "Export CenterPoint PonintPillars ONNX Model For TensorRT" } repositories: { - is_official: true - url: "https://github.com/tianweiy/CenterPoint" + url: "https://github.com/darrenjkt/CenterPoint" + owner: "darrenjkt" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + } + repositories: { + url: "https://github.com/tianweiy/CenterPoint-KITTI" owner: "tianweiy" framework: FRAMEWORK_PYTORCH - number_of_stars: 701 + number_of_stars: 67 + } + methods: { + name: "CenterPoint" + full_name: "CenterPoint" + description: "**CenterPoint** is a two-stage 3D detector that finds centers of objects and their properties using a keypoint detector and regresses to other attributes, including 3D size, 3D orientation and velocity. In a second-stage, it refines these estimates using additional point features on the object. CenterPoint uses a standard Lidar-based backbone network, i.e., VoxelNet or PointPillars, to build a representation of the input point-cloud. CenterPoint predicts the relative offset (velocity) of objects between consecutive frames, which are then linked up greedily -- so in Centerpoint, 3D object tracking simplifies to greedy closest-point matching." } } papers: { @@ -26287,7 +26547,7 @@ pr_id_to_video: { url: "https://github.com/detectRecog/PointTrack" owner: "detectRecog" framework: FRAMEWORK_PYTORCH - number_of_stars: 218 + number_of_stars: 219 description: "PointTrack (ECCV2020 ORAL): Segment as Points for Efficient Online Multi-Object Tracking and Segmentation" } methods: { @@ -26325,8 +26585,8 @@ pr_id_to_video: { video: { video_id: "mDdpwe2xsT4" video_title: "PR-241: Objects as Points" - number_of_likes: 48 - number_of_views: 3459 + number_of_likes: 52 + number_of_views: 3657 published_date: { seconds: 1587909186 } @@ -26357,7 +26617,7 @@ pr_id_to_video: { url: "https://github.com/wuch15/PTUM" owner: "wuch15" framework: FRAMEWORK_OTHERS - number_of_stars: 7 + number_of_stars: 9 description: "Resources of \"PTUM: Pre-training User Model from Unlabeled User Behaviors via Self-supervision\"" } } @@ -26429,7 +26689,7 @@ pr_id_to_video: { video_id: "d2IaWtBbJjg" video_title: "PR-242: BERT4Rec -Sequential Recommendation with BERT" number_of_likes: 12 - number_of_views: 859 + number_of_views: 928 published_date: { seconds: 1593349958 } @@ -26455,75 +26715,74 @@ pr_id_to_video: { authors: "Kaiming He" authors: "Piotr Dollár" repositories: { - is_official: true - url: "https://github.com/facebookresearch/pycls" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1648 - description: "Codebase for Image Classification Research, written in PyTorch." + url: "https://github.com/PaperCodeReview/RegNet-TF" + owner: "PaperCodeReview" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "TF 2.x implementation of RegNet (Designing Network Design Spaces, CVPR 2020)." } repositories: { - url: "https://github.com/facebookresearch/vissl" - owner: "facebookresearch" + url: "https://github.com/Alessiacosmos/RegNet-pytorch" + owner: "Alessiacosmos" framework: FRAMEWORK_PYTORCH - number_of_stars: 1788 - description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." + number_of_stars: 1 + description: "A pytorch implement of RegNet (Designing Netowrk design spaces). Original paper link: https://arxiv.org/pdf/2003.13678.pdf" } repositories: { url: "https://github.com/PaddlePaddle/PaddleClas" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 + number_of_stars: 2166 description: "A treasure chest for visual recognition powered by PaddlePaddle" } repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" + url: "https://github.com/j3soon/arxiv-utils" + owner: "j3soon" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" + number_of_stars: 17 + description: "Meaningful titles for tabs and PDF downloads!" } repositories: { - url: "https://github.com/Alessiacosmos/Basic-RegNet-pytorch" - owner: "Alessiacosmos" + url: "https://github.com/rwightman/pytorch-image-models" + owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A pytorch implement of RegNet (Designing Netowrk design spaces). Original paper link: https://arxiv.org/pdf/2003.13678.pdf" + number_of_stars: 12196 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } repositories: { - url: "https://github.com/open-mmlab/mmdetection3d" - owner: "open-mmlab" + url: "https://github.com/facebookresearch/vissl" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1326 - description: "OpenMMLab's next-generation platform for general 3D object detection." + number_of_stars: 1878 + description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." } repositories: { url: "https://github.com/open-mmlab/mmdetection" owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 + number_of_stars: 16041 description: "OpenMMLab Detection Toolbox and Benchmark" } repositories: { - url: "https://github.com/PaperCodeReview/RegNet-TF" - owner: "PaperCodeReview" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "TF 2.x implementation of RegNet (Designing Network Design Spaces, CVPR 2020)." + url: "https://github.com/open-mmlab/mmdetection3d" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1419 + description: "OpenMMLab's next-generation platform for general 3D object detection." } repositories: { - url: "https://github.com/Alessiacosmos/RegNet-pytorch" + url: "https://github.com/Alessiacosmos/Basic-RegNet-pytorch" owner: "Alessiacosmos" framework: FRAMEWORK_PYTORCH number_of_stars: 1 description: "A pytorch implement of RegNet (Designing Netowrk design spaces). Original paper link: https://arxiv.org/pdf/2003.13678.pdf" } repositories: { - url: "https://github.com/j3soon/arxiv-utils" - owner: "j3soon" + url: "https://github.com/osmr/imgclsmob" + owner: "osmr" framework: FRAMEWORK_OTHERS - number_of_stars: 17 - description: "Meaningful titles for tabs and PDF downloads!" + number_of_stars: 2268 + description: "Sandbox for training deep learning networks" } methods: { name: "Weight Decay" @@ -26589,20 +26848,20 @@ pr_id_to_video: { authors: "Saining Xie" authors: "Wan-Yen Lo" authors: "Piotr Dollár" - repositories: { - url: "https://github.com/facebookresearch/pycls" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1648 - description: "Codebase for Image Classification Research, written in PyTorch." - } repositories: { url: "https://github.com/facebookresearch/nds" owner: "facebookresearch" framework: FRAMEWORK_OTHERS - number_of_stars: 83 + number_of_stars: 85 description: "On Network Design Spaces for Visual Recognition" } + repositories: { + url: "https://github.com/facebookresearch/pycls" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1674 + description: "Codebase for Image Classification Research, written in PyTorch." + } methods: { name: "LSTM" full_name: "Long Short-Term Memory" @@ -26665,6 +26924,14 @@ pr_id_to_video: { authors: "Jiaxuan You" authors: "Rex Ying" authors: "Jure Leskovec" + repositories: { + is_official: true + url: "https://github.com/snap-stanford/graphgym" + owner: "snap-stanford" + framework: FRAMEWORK_PYTORCH + number_of_stars: 624 + description: "Platform for designing and evaluating Graph Neural Networks (GNN)" + } } papers: { paper_id: "network-space-search-for-pareto-efficient" @@ -26707,7 +26974,7 @@ pr_id_to_video: { url: "https://github.com/ThomasMrY/ActivationFunctionDemo" owner: "ThomasMrY" framework: FRAMEWORK_PYTORCH - number_of_stars: 13 + number_of_stars: 14 description: "The \"Activation Function Demo\" is a demo for implementing activation function with the mathod propsed in paper: Design Space Exploration of Neural Network Activation Function Circuits" } } @@ -26754,8 +27021,8 @@ pr_id_to_video: { video: { video_id: "bnbKQRae_u4" video_title: "PR-243: Designing Network Design Spaces" - number_of_likes: 28 - number_of_views: 1578 + number_of_likes: 32 + number_of_views: 1653 published_date: { seconds: 1588515773 } @@ -26782,13 +27049,6 @@ pr_id_to_video: { authors: "Michal Irani" authors: "William T. Freeman" authors: "Tali Dekel" - repositories: { - url: "https://github.com/ChristophReich1996/Semantic_Pyramid_for_Image_Generation" - owner: "ChristophReich1996" - framework: FRAMEWORK_PYTORCH - number_of_stars: 41 - description: "PyTorch reimplementation of the paper: \"Semantic Pyramid for Image Generation\" [CVPR 2020]." - } repositories: { url: "https://github.com/rosinality/semantic-pyramid-pytorch" owner: "rosinality" @@ -26796,6 +27056,13 @@ pr_id_to_video: { number_of_stars: 33 description: "Implementation of Semantic Pyramid for Image Generation (https://arxiv.org/abs/2003.06221) in PyTorch" } + repositories: { + url: "https://github.com/ChristophReich1996/Semantic_Pyramid_for_Image_Generation" + owner: "ChristophReich1996" + framework: FRAMEWORK_PYTORCH + number_of_stars: 42 + description: "PyTorch reimplementation of the paper: \"Semantic Pyramid for Image Generation\" [CVPR 2020]." + } } papers: { paper_id: "generator-pyramid-for-high-resolution-image" @@ -26850,7 +27117,7 @@ pr_id_to_video: { url: "https://github.com/JinshuChen/MOGAN" owner: "JinshuChen" framework: FRAMEWORK_PYTORCH - number_of_stars: 64 + number_of_stars: 65 } methods: { name: "GAN" @@ -26883,7 +27150,7 @@ pr_id_to_video: { url: "https://github.com/tom-roddick/mono-semantic-maps" owner: "tom-roddick" framework: FRAMEWORK_OTHERS - number_of_stars: 67 + number_of_stars: 77 } } papers: { @@ -26916,26 +27183,20 @@ pr_id_to_video: { authors: "Hongyang Chao" authors: "Baining Guo" repositories: { - url: "https://github.com/kahnchana/pennet" - owner: "kahnchana" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/qxdnfsy/PEN-Net-Keras-Img_Inpainting" + owner: "qxdnfsy" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Image Inpainting PEN-Net CVPR-2019 Keras" } repositories: { is_official: true url: "https://github.com/researchmm/PEN-Net-for-Inpainting" owner: "researchmm" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 234 + number_of_stars: 247 description: "[CVPR'2019] PEN-Net: Learning Pyramid-Context Encoder Network for High-Quality Image Inpainting " } - repositories: { - url: "https://github.com/qxdnfsy/PEN-Net-Keras-Img_Inpainting" - owner: "qxdnfsy" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Image Inpainting PEN-Net CVPR-2019 Keras" - } methods: { name: "Convolution" full_name: "Convolution" @@ -26979,8 +27240,8 @@ pr_id_to_video: { video: { video_id: "b1xoR4utQ3k" video_title: "PR-244: Semantic Pyramid for Image Generation" - number_of_likes: 8 - number_of_views: 659 + number_of_likes: 9 + number_of_views: 665 published_date: { seconds: 1588519423 } @@ -27016,7 +27277,7 @@ pr_id_to_video: { url: "https://github.com/divelab/MoleculeProp" owner: "divelab" framework: FRAMEWORK_PYTORCH - number_of_stars: 50 + number_of_stars: 55 } methods: { name: "Batch Normalization" @@ -27101,7 +27362,7 @@ pr_id_to_video: { url: "https://github.com/wjm41/soapgp" owner: "wjm41" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 13 + number_of_stars: 14 description: "Molecular Property Prediction using GP with a SOAP kernel" } } @@ -27174,7 +27435,7 @@ pr_id_to_video: { url: "https://github.com/learningmatter-mit/NeuralForceField" owner: "learningmatter-mit" framework: FRAMEWORK_PYTORCH - number_of_stars: 22 + number_of_stars: 23 description: "Neural Network Force Field based on PyTorch" } } @@ -27182,7 +27443,7 @@ pr_id_to_video: { video_id: "I-ARmyiRd1A" video_title: "PR-245: A deep learning approach to antibiotics discovery" number_of_likes: 13 - number_of_views: 538 + number_of_views: 539 published_date: { seconds: 1589119956 } @@ -27326,28 +27587,28 @@ pr_id_to_video: { url: "https://github.com/brain-research/realistic-ssl-evaluation" owner: "brain-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 405 + number_of_stars: 413 description: "Open source release of the evaluation benchmark suite described in \"Realistic Evaluation of Deep Semi-Supervised Learning Algorithms\"" } repositories: { url: "https://github.com/siit-vtt/semi-supervised-learning-pytorch" owner: "siit-vtt" framework: FRAMEWORK_PYTORCH - number_of_stars: 57 + number_of_stars: 58 description: "Several SSL methods (Pi model, Mean Teacher) are implemented in pytorch" } repositories: { url: "https://github.com/perrying/realistic-ssl-evaluation-pytorch" owner: "perrying" framework: FRAMEWORK_PYTORCH - number_of_stars: 65 + number_of_stars: 66 description: "Reimplementation of \"Realistic Evaluation of Deep Semi-Supervised Learning Algorithms\"" } repositories: { url: "https://github.com/siit-vtt/ssl" owner: "siit-vtt" framework: FRAMEWORK_PYTORCH - number_of_stars: 57 + number_of_stars: 58 description: "Several SSL methods (Pi model, Mean Teacher) are implemented in pytorch" } repositories: { @@ -27399,6 +27660,14 @@ pr_id_to_video: { } authors: "Jong-Chyi Su" authors: "Subhransu Maji" + repositories: { + is_official: true + url: "https://github.com/cvl-umass/semi-inat-2020" + owner: "cvl-umass" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "Semi-Supervised Fine-Grained Recognition Challenge at FGVC7" + } } papers: { paper_id: "recent-deep-semi-supervised-learning" @@ -27427,7 +27696,7 @@ pr_id_to_video: { url: "https://github.com/uvavision/Curriculum-Labeling" owner: "uvavision" framework: FRAMEWORK_PYTORCH - number_of_stars: 53 + number_of_stars: 57 description: "[AAAI 21] Curriculum Labeling: Revisiting Pseudo-Labeling for Semi-Supervised Learning" } } @@ -27513,12 +27782,20 @@ pr_id_to_video: { authors: "Giorgos Tolias" authors: "Yannis Avrithis" authors: "Ondrej Chum" + repositories: { + is_official: true + url: "https://github.com/ahmetius/LP-DeepSSL" + owner: "ahmetius" + framework: FRAMEWORK_PYTORCH + number_of_stars: 70 + description: "Code for CVPR 2019 paper Label Propagation for Deep Semi-supervised Learning" + } } video: { video_id: "9w9_sk9r6hk" video_title: "PR-247: Realistic Evaluation of Deep Semi-Supervised Learning Algorithms" number_of_likes: 8 - number_of_views: 553 + number_of_views: 556 published_date: { seconds: 1589724627 } @@ -27542,27 +27819,27 @@ pr_id_to_video: { authors: "Alex Andonian" authors: "Aude Oliva" authors: "Antonio Torralba" - repositories: { - url: "https://github.com/zhoubolei/TRN-pytorch" - owner: "zhoubolei" - framework: FRAMEWORK_PYTORCH - number_of_stars: 727 - description: "Temporal Relation Networks" - } repositories: { url: "https://github.com/metalbubble/TRN-pytorch" owner: "metalbubble" framework: FRAMEWORK_PYTORCH - number_of_stars: 727 + number_of_stars: 732 description: "Temporal Relation Networks" } repositories: { url: "https://github.com/okankop/MFF-pytorch" owner: "okankop" framework: FRAMEWORK_PYTORCH - number_of_stars: 120 + number_of_stars: 121 description: "Motion Fused Frames implementation in PyTorch, codes and pretrained models." } + repositories: { + url: "https://github.com/zhoubolei/TRN-pytorch" + owner: "zhoubolei" + framework: FRAMEWORK_PYTORCH + number_of_stars: 732 + description: "Temporal Relation Networks" + } methods: { name: "Convolution" full_name: "Convolution" @@ -27591,7 +27868,7 @@ pr_id_to_video: { url: "https://github.com/redwang/DTGRM" owner: "redwang" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 + number_of_stars: 8 description: "Temporal Relational Modeling with Self-Supervision for Action Segmentation" } methods: { @@ -27654,18 +27931,18 @@ pr_id_to_video: { authors: "Majid Mirmehdi" authors: "Dima Damen" repositories: { - url: "https://github.com/tobyperrett/few-shot-action-recognition" + is_official: true + url: "https://github.com/tobyperrett/trx" owner: "tobyperrett" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 48 + description: "Temporal-Relational CrossTransformers" } repositories: { - is_official: true - url: "https://github.com/tobyperrett/trx" + url: "https://github.com/tobyperrett/few-shot-action-recognition" owner: "tobyperrett" framework: FRAMEWORK_PYTORCH - number_of_stars: 43 - description: "Temporal-Relational CrossTransformers" + number_of_stars: 5 } } papers: { @@ -27697,9 +27974,14 @@ pr_id_to_video: { url: "https://github.com/thaolmk54/hcrn-videoqa" owner: "thaolmk54" framework: FRAMEWORK_PYTORCH - number_of_stars: 90 + number_of_stars: 91 description: "Implementation for the paper \"Hierarchical Conditional Relation Networks for Video Question Answering\" (Le et al., CVPR 2020, Oral)" } + methods: { + name: "CRN" + full_name: "Conditional Relation Network" + description: "**Conditional Relation Network**, or **CRN**, is a building block to construct more sophisticated structures for representation and reasoning over video. CRN takes as input an array of tensorial objects and a conditioning feature, and computes an array of encoded output objects. Model building becomes a simple exercise of replication, rearrangement and stacking of these reusable units for diverse modalities and contextual information. This design thus supports high-order relational and multi-step reasoning." + } } papers: { paper_id: "interpreting-video-features-a-comparison-of-1" @@ -27748,7 +28030,7 @@ pr_id_to_video: { video_id: "c2vqEzzPML8" video_title: "PR-248: Temporal Relational Reasoning in Videos" number_of_likes: 5 - number_of_views: 596 + number_of_views: 606 published_date: { seconds: 1589732548 } @@ -27772,75 +28054,73 @@ pr_id_to_video: { authors: "Chien-Yao Wang" authors: "Hong-Yuan Mark Liao" repositories: { - url: "https://github.com/wiegehtki/zoneminder-jetson" - owner: "wiegehtki" - framework: FRAMEWORK_OTHERS - description: "Personen und Gesichtserkennung mit Zoneminder, OpenCV (GPU), YOLO, cuDNN und CUDA" - } - repositories: { - url: "https://github.com/hunter10bt/DeepLearningFinalPresentation" - owner: "hunter10bt" + url: "https://github.com/Abhi-899/YOLOV4-Custom-Object-Detection" + owner: "Abhi-899" framework: FRAMEWORK_OTHERS + description: "In this project we will train the YOLOV4 network on 3 classes 'Ambulance' , 'Car' , 'Person' with the Google open image dataset and run the detection on a real video caught on a moving traffic camera" } repositories: { - url: "https://github.com/wangermeng2021/Scaled-YOLOv4-tensorflow2" - owner: "wangermeng2021" + url: "https://github.com/Lebhoryi/keras-YOLOv3-model-set" + owner: "Lebhoryi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 24 - description: "A Tensorflow2.x implementation of Scaled-YOLOv4 as described in Scaled-YOLOv4: Scaling Cross Stage Partial Network" + number_of_stars: 1 + description: "转自https://github.com/david8862/keras-YOLOv3-model-set " } repositories: { - url: "https://github.com/FelixFu520/yolov4" - owner: "FelixFu520" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RobotMobile/cv-deep-learning-paper-review" + owner: "RobotMobile" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { - url: "https://github.com/david8862/keras-YOLOv3-model-set" - owner: "david8862" + url: "https://github.com/otamajakusi/darknet-yolov4" + owner: "otamajakusi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 494 - description: "end-to-end YOLOv4/v3/v2 object detection pipeline, implemented on tf.keras with different technologies" } repositories: { - url: "https://github.com/MEME-Phoenix/Autonomous-Driving-Cart-MEME" - owner: "MEME-Phoenix" - framework: FRAMEWORK_PYTORCH - description: "Autonomous Driving Cart, MEME" + url: "https://github.com/ccie29441/Yolo-v4-and-Yolo-v3-v2-for-Windows-and-Linux" + owner: "ccie29441" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 } repositories: { - url: "https://github.com/Qengineering/YoloV4-ncnn-Jetson-Nano" - owner: "Qengineering" + url: "https://github.com/Dodant/ANPR-with-Yolov4" + owner: "Dodant" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "YoloV4 for Jetson Nano" + number_of_stars: 10 + description: "2020-1 CNU SW Capstone Design Project" } repositories: { - url: "https://github.com/CRIGIM/darknet" - owner: "CRIGIM" + url: "https://github.com/hhk7734/tensorflow-yolov4" + owner: "hhk7734" framework: FRAMEWORK_TENSORFLOW - description: "edited darknet" + number_of_stars: 126 + description: "YOLOv4 Implemented in Tensorflow 2." } repositories: { - url: "https://github.com/ayoungkang/yolov4" - owner: "ayoungkang" + url: "https://github.com/weidalin/yolov4_mixup" + owner: "weidalin" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "yolov4 data augments with mixup" } repositories: { - url: "https://github.com/taeokimeng/object-detection-yolo" - owner: "taeokimeng" + url: "https://github.com/Qengineering/YoloV4-ncnn-Raspberry-Pi-64-bit" + owner: "Qengineering" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Object Detection with YOLO and Streamlit" + number_of_stars: 25 + description: "YoloV4 on a bare Raspberry Pi 4 with ncnn framework" } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + repositories: { + url: "https://github.com/RunzhaoHuang/DeepSort_YOLOV5_OnScreen" + owner: "RunzhaoHuang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } methods: { - name: "Spatial Attention Module" - full_name: "Spatial Attention Module" - description: "A **Spatial Attention Module** is a module for spatial attention in convolutional neural networks. It generates a spatial attention map by utilizing the inter-spatial relationship of features. Different from the [channel attention](https://paperswithcode.com/method/channel-attention-module), the spatial attention focuses on where is an informative part, which is complementary to the channel attention. To compute the spatial attention, we first apply average-pooling and max-pooling operations along the channel axis and concatenate them to generate an efficient feature descriptor. On the concatenated feature descriptor, we apply a convolution layer to generate a spatial attention map $\\textbf{M}\\_{s}\\left(F\\right) \\in \\mathcal{R}^{H×W}$ which encodes where to emphasize or suppress. \r\n\r\nWe aggregate channel information of a feature map by using two pooling operations, generating two 2D maps: $\\mathbf{F}^{s}\\_{avg} \\in \\mathbb{R}^{1\\times{H}\\times{W}}$ and $\\mathbf{F}^{s}\\_{max} \\in \\mathbb{R}^{1\\times{H}\\times{W}}$. Each denotes average-pooled features and max-pooled features across the channel. Those are then concatenated and convolved by a standard convolution layer, producing the 2D spatial attention map. In short, the spatial attention is computed as:\r\n\r\n$$ \\textbf{M}\\_{s}\\left(F\\right) = \\sigma\\left(f^{7x7}\\left(\\left[\\text{AvgPool}\\left(F\\right);\\text{MaxPool}\\left(F\\right)\\right]\\right)\\right) $$\r\n\r\n$$ \\textbf{M}\\_{s}\\left(F\\right) = \\sigma\\left(f^{7x7}\\left(\\left[\\mathbf{F}^{s}\\_{avg};\\mathbf{F}^{s}\\_{max} \\right]\\right)\\right) $$\r\n\r\nwhere $\\sigma$ denotes the sigmoid function and $f^{7×7}$ represents a convolution operation with the filter size of 7 × 7." + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } methods: { name: "Bottom-up Path Augmentation" @@ -27848,19 +28128,24 @@ pr_id_to_video: { description: "**Bottom-up Path Augmentation** is a feature extraction technique that seeks to shorten the information path and enhance a feature pyramid with accurate localization signals existing in low-levels. This is based on the fact that high response to edges or instance parts is a strong indicator to accurately localize instances. \r\n\r\nEach building block takes a higher resolution feature map $N\\_{i}$ and a coarser map $P\\_{i+1}$ through lateral connection and generates the new feature map $N\\_{i+1}$ Each feature map $N\\_{i}$ first goes through a $3 \\times 3$ convolutional layer with stride $2$ to reduce the spatial size. Then each element of feature map $P\\_{i+1}$ and the down-sampled map are added through lateral connection. The fused feature map is then processed by another $3 \\times 3$ convolutional layer to generate $N\\_{i+1}$ for following sub-networks. This is an iterative process and terminates after approaching $P\\_{5}$. In these building blocks, we consistently use channel 256 of feature maps. The feature grid for each proposal is then pooled from new feature maps, i.e., {$N\\_{2}$, $N\\_{3}$, $N\\_{4}$, $N\\_{5}$}." } methods: { - name: "PAFPN" - full_name: "PAFPN" - description: "**PAFPN** is a feature pyramid module used in Path Aggregation networks ([PANet](https://paperswithcode.com/method/panet)) that combines FPNs with bottom-up path augmentation, which shortens the information path between lower layers and topmost feature." + name: "k-Means Clustering" + full_name: "k-Means Clustering" + description: "**k-Means Clustering** is a clustering algorithm that divides a training set into $k$ different clusters of examples that are near each other. It works by initializing $k$ different centroids {$\\mu\\left(1\\right),\\ldots,\\mu\\left(k\\right)$} to different values, then alternating between two steps until convergence:\r\n\r\n(i) each training example is assigned to cluster $i$ where $i$ is the index of the nearest centroid $\\mu^{(i)}$\r\n\r\n(ii) each centroid $\\mu^{(i)}$ is updated to the mean of all training examples $x^{(j)}$ assigned to cluster $i$.\r\n\r\nText Source: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [scikit-learn](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_digits.html)" + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." } methods: { - name: "DIoU-NMS" - full_name: "DIoU-NMS" - description: "**DIoU-NMS** is a type of non-maximum suppression where we use Distance IoU rather than regular DIoU, in which the overlap area and the distance between two central points of bounding boxes are simultaneously considered when suppressing redundant boxes.\r\n\r\nIn original NMS, the IoU metric is used to suppress the redundant detection boxes, where the overlap area is the unique factor, often yielding false suppression for the cases with occlusion. With DIoU-NMS, we not only consider the overlap area but also central point distance between two boxes." + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { name: "Weight Decay" @@ -27868,19 +28153,19 @@ pr_id_to_video: { description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" } methods: { - name: "YOLOv3" - full_name: "YOLOv3" - description: "**YOLOv3** is a real-time, single-stage object detection model that builds on [YOLOv2](https://paperswithcode.com/method/yolov2) with several improvements. Improvements include the use of a new backbone network, Darknet-53 that utilises residual connections, or in the words of the author, \"those newfangled residual network stuff\", as well as some improvements to the bounding box prediction step, and use of three different scales from which to extract features (similar to an FPN)." + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + name: "Spatial Pyramid Pooling" + full_name: "Spatial Pyramid Pooling" + description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." } methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } } papers: { @@ -27895,10 +28180,9 @@ pr_id_to_video: { authors: "Alexey Bochkovskiy" authors: "Hong-Yuan Mark Liao" repositories: { - url: "https://github.com/CRIGIM/darknet" - owner: "CRIGIM" - framework: FRAMEWORK_TENSORFLOW - description: "edited darknet" + url: "https://github.com/youngrockoh/ScaledYOLOv4Large" + owner: "youngrockoh" + framework: FRAMEWORK_PYTORCH } repositories: { url: "https://github.com/Beaver48/kaggle-chest-xray-abnormalities" @@ -27906,54 +28190,55 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/ntcuong777/aicc-lightnet" - owner: "ntcuong777" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + url: "https://github.com/ikuokuo/start-scaled-yolov4" + owner: "ikuokuo" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "Start Scaled YOLOv4" } repositories: { - url: "https://github.com/ash80/scaledyolov4-cpu" - owner: "ash80" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "A cpu supported variant of Scaled YOLOv4 CSP" + url: "https://github.com/agelencs/darknet_original" + owner: "agelencs" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/joker311200/yolov4-csp" - owner: "joker311200" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/wangermeng2021/ScaledYOLOv4-tensorflow2" + owner: "wangermeng2021" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 25 + description: "A Tensorflow2.x implementation of Scaled-YOLOv4 as described in Scaled-YOLOv4: Scaling Cross Stage Partial Network" } repositories: { - url: "https://github.com/leggedrobotics/darknet" - owner: "leggedrobotics" + is_official: true + url: "https://github.com/WongKinYiu/ScaledYOLOv4" + owner: "WongKinYiu" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Convolutional Neural Networks" + number_of_stars: 1501 + description: "Scaled-YOLOv4: Scaling Cross Stage Partial Network" } repositories: { - url: "https://github.com/xolbynz/scaled_yolo" - owner: "xolbynz" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/6-dl/darknet_wpb" + owner: "6-dl" + framework: FRAMEWORK_TENSORFLOW + description: "forked from AlexeyAB/darknet" } repositories: { - url: "https://github.com/gaurav67890/ScaledYOLO" - owner: "gaurav67890" + url: "https://github.com/joker311200/yolov4-csp" + owner: "joker311200" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } repositories: { - url: "https://github.com/6-dl/darknet_wpb" - owner: "6-dl" + url: "https://github.com/chy0428/Yolo_flir" + owner: "chy0428" framework: FRAMEWORK_TENSORFLOW - description: "forked from AlexeyAB/darknet" } repositories: { - url: "https://github.com/Ededu1984/car_detection_forza_horizon" - owner: "Ededu1984" + url: "https://github.com/AlexeyAB/darknet" + owner: "AlexeyAB" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 74 - description: "Object detection of Forza Horizon 4 gameplay video" + number_of_stars: 17018 + description: "YOLOv4 / Scaled-YOLOv4 / YOLO - Neural Networks for Object Detection (Windows and Linux version of Darknet )" } methods: { name: "YOLOv4" @@ -28032,7 +28317,7 @@ pr_id_to_video: { url: "https://github.com/PaddlePaddle/PaddleDetection" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 4334 + number_of_stars: 4484 description: "Object detection and instance segmentation toolkit based on PaddlePaddle." } methods: { @@ -28138,7 +28423,7 @@ pr_id_to_video: { url: "https://github.com/Duankaiwen/CPNDet" owner: "Duankaiwen" framework: FRAMEWORK_PYTORCH - number_of_stars: 164 + number_of_stars: 169 description: "Corner Proposal Network for Anchor-free, Two-stage Object Detection" } methods: { @@ -28164,12 +28449,10 @@ pr_id_to_video: { authors: "Bin Ren" authors: "Yanzhi Wang" repositories: { - is_official: true - url: "https://github.com/nightsnack/YOLObile" - owner: "nightsnack" + url: "https://github.com/CoCoPIE-Pruning/CoCoPIE-ModelZoo" + owner: "CoCoPIE-Pruning" framework: FRAMEWORK_PYTORCH - number_of_stars: 294 - description: "This is the implementation of YOLObile: Real-Time Object Detection on Mobile Devices via Compression-Compilation Co-Design" + number_of_stars: 105 } repositories: { url: "https://github.com/hpc203/YOLObile-opencv-dnn" @@ -28179,10 +28462,12 @@ pr_id_to_video: { description: "使用opencv的dnn模块做YOLObile的目标检测" } repositories: { - url: "https://github.com/CoCoPIE-Pruning/CoCoPIE-ModelZoo" - owner: "CoCoPIE-Pruning" + is_official: true + url: "https://github.com/nightsnack/YOLObile" + owner: "nightsnack" framework: FRAMEWORK_PYTORCH - number_of_stars: 103 + number_of_stars: 301 + description: "This is the implementation of YOLObile: Real-Time Object Detection on Mobile Devices via Compression-Compilation Co-Design" } methods: { name: "YOLOv4" @@ -28245,12 +28530,17 @@ pr_id_to_video: { } authors: "David Noever" authors: "Samantha E. Miller Noever" + methods: { + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" + } } video: { video_id: "CXRlpsFpVUE" video_title: "PR-249: YOLOv4: Optimal Speed and Accuracy of Object Detection" - number_of_likes: 60 - number_of_views: 4226 + number_of_likes: 63 + number_of_views: 4425 published_date: { seconds: 1590329152 } @@ -28414,21 +28704,21 @@ pr_id_to_video: { url: "https://github.com/PaddlePaddle/PaddleNLP/tree/develop/paddlenlp/transformers/bigbird" owner: "transformers" framework: FRAMEWORK_OTHERS - number_of_stars: 1489 + number_of_stars: 1698 description: "An NLP library with Awesome pre-trained Transformer models and easy-to-use interface, supporting wide-range of NLP tasks from research to industrial applications." } repositories: { url: "https://github.com/huggingface/transformers" owner: "huggingface" framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 + number_of_stars: 49984 description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { url: "https://github.com/tensorflow/models/tree/master/official/nlp/projects/bigbird" owner: "projects" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70576 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } repositories: { @@ -28436,9 +28726,14 @@ pr_id_to_video: { url: "https://github.com/google-research/bigbird" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 279 + number_of_stars: 293 description: "Transformers for Longer Sequences" } + methods: { + name: "BigBird" + full_name: "BigBird" + description: "**BigBird** is a Transformer with a sparse attention mechanism that reduces the quadratic dependency of self-attention to linear in the number of tokens. BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model. In particular, BigBird consists of three main parts:\r\n\r\n- A set of $g$ global tokens attending on all parts of the sequence.\r\n- All tokens attending to a set of $w$ local neighboring tokens.\r\n- All tokens attending to a set of $r$ random tokens.\r\n\r\nThis leads to a high performing attention mechanism scaling to much longer sequence lengths (8x)." + } } papers: { paper_id: "an-em-approach-to-non-autoregressive" @@ -28540,7 +28835,7 @@ pr_id_to_video: { video: { video_id: "sfy6qJIRyvg" video_title: "PR-250: Are Transformers universal approximators of sequence-to-sequence functions?" - number_of_views: 418 + number_of_views: 427 published_date: { seconds: 1590937118 } @@ -28651,7 +28946,7 @@ pr_id_to_video: { url: "https://github.com/dingyiming0427/goalgail" owner: "dingyiming0427" framework: FRAMEWORK_OTHERS - number_of_stars: 34 + number_of_stars: 36 description: "accompanying code for neurips submission \"Goal-conditioned Imitation Learning\" " } methods: { @@ -28682,7 +28977,7 @@ pr_id_to_video: { url: "https://github.com/kzl/decision-transformer" owner: "kzl" framework: FRAMEWORK_PYTORCH - number_of_stars: 699 + number_of_stars: 792 description: "Official codebase for Decision Transformer: Reinforcement Learning via Sequence Modeling." } methods: { @@ -28802,7 +29097,7 @@ pr_id_to_video: { video_id: "_ZY5QI9yKu4" video_title: "PR-251: Reward-Conditioned Policies" number_of_likes: 18 - number_of_views: 434 + number_of_views: 443 published_date: { seconds: 1591537660 } @@ -28824,25 +29119,33 @@ pr_id_to_video: { } authors: "Richard Zhang" repositories: { - url: "https://github.com/mauriceweiler/MobiusCNNs" - owner: "mauriceweiler" + is_official: true + url: "https://github.com/adobe/antialiased-cnns" + owner: "adobe" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "This repository implements and evaluates convolutional networks on the Möbius strip as toy model instantiations of Coordinate Independent Convolutional Networks." + number_of_stars: 1405 + description: "pip install antialiased-cnns to improve stability and accuracy" + } + repositories: { + url: "https://github.com/mnikitin/Shift-Invariant-CNNs" + owner: "mnikitin" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Gluon implementation of anti-aliased CNNs" } repositories: { url: "https://github.com/kornia/kornia/blob/e4a54c2d4fd9b8eaa88dec50061e33f7aee5d959/kornia/filters/blur_pool.py" owner: "filters" framework: FRAMEWORK_PYTORCH - number_of_stars: 4254 + number_of_stars: 4392 description: "Open Source Differentiable Computer Vision Library for PyTorch" } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" + url: "https://github.com/tattaka/Antialiased-CNNs-Converter-PyTorch" + owner: "tattaka" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + number_of_stars: 4 + description: "Unofficial Pytorch module converter implementation of the paper Antialiased-CNNs." } repositories: { url: "https://github.com/ricky40403/BlurPool" @@ -28851,26 +29154,18 @@ pr_id_to_video: { description: "Caffe implementation of Adobe paper: \"Making Convolutional Networks Shift-Invariant Again\"" } repositories: { - url: "https://github.com/tattaka/Antialiased-CNNs-Converter-PyTorch" - owner: "tattaka" + url: "https://github.com/rwightman/pytorch-image-models" + owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "Unofficial Pytorch module converter implementation of the paper Antialiased-CNNs." + number_of_stars: 12196 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } repositories: { - is_official: true - url: "https://github.com/adobe/antialiased-cnns" - owner: "adobe" + url: "https://github.com/mauriceweiler/MobiusCNNs" + owner: "mauriceweiler" framework: FRAMEWORK_PYTORCH - number_of_stars: 1398 - description: "pip install antialiased-cnns to improve stability and accuracy" - } - repositories: { - url: "https://github.com/mnikitin/Shift-Invariant-CNNs" - owner: "mnikitin" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Gluon implementation of anti-aliased CNNs" + number_of_stars: 45 + description: "This repository implements and evaluates convolutional networks on the Möbius strip as toy model instantiations of Coordinate Independent Convolutional Networks." } methods: { name: "ResNet" @@ -28938,7 +29233,7 @@ pr_id_to_video: { url: "https://github.com/achaman2/truly_shift_invariant_cnns" owner: "achaman2" framework: FRAMEWORK_PYTORCH - number_of_stars: 24 + number_of_stars: 25 } methods: { name: "Convolution" @@ -28957,11 +29252,17 @@ pr_id_to_video: { authors: "Aharon Azulay" authors: "Yair Weiss" repositories: { - url: "https://github.com/mauriceweiler/MobiusCNNs" - owner: "mauriceweiler" + url: "https://github.com/premthomas/keras-image-classification" + owner: "premthomas" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Grey-scale Image Classification using KERAS" + } + repositories: { + url: "https://github.com/amerch/CIFAR100-Training" + owner: "amerch" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "This repository implements and evaluates convolutional networks on the Möbius strip as toy model instantiations of Coordinate Independent Convolutional Networks." + number_of_stars: 2 } repositories: { is_official: true @@ -28972,17 +29273,11 @@ pr_id_to_video: { description: "Why do deep convolutional networks generalize so poorly to small image transformations? " } repositories: { - url: "https://github.com/premthomas/keras-image-classification" - owner: "premthomas" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Grey-scale Image Classification using KERAS" - } - repositories: { - url: "https://github.com/amerch/CIFAR100-Training" - owner: "amerch" + url: "https://github.com/mauriceweiler/MobiusCNNs" + owner: "mauriceweiler" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 45 + description: "This repository implements and evaluates convolutional networks on the Möbius strip as toy model instantiations of Coordinate Independent Convolutional Networks." } } papers: { @@ -29025,7 +29320,7 @@ pr_id_to_video: { url: "https://github.com/achaman2/truly_shift_invariant_cnns" owner: "achaman2" framework: FRAMEWORK_PYTORCH - number_of_stars: 24 + number_of_stars: 25 } } papers: { @@ -29072,8 +29367,8 @@ pr_id_to_video: { video: { video_id: "oTIBFH6M7YM" video_title: "PR-252: Making Convolutional Networks Shift-Invariant Again" - number_of_likes: 13 - number_of_views: 603 + number_of_likes: 15 + number_of_views: 629 published_date: { seconds: 1591542125 } @@ -29115,7 +29410,7 @@ pr_id_to_video: { url: "https://github.com/jbeomlee93/AdvCAM" owner: "jbeomlee93" framework: FRAMEWORK_PYTORCH - number_of_stars: 36 + number_of_stars: 38 description: "Anti-Adversarially Manipulated Attributions for Weakly and Semi-Supervised Semantic Segmentation (CVPR 2021)" } } @@ -29158,7 +29453,7 @@ pr_id_to_video: { url: "https://github.com/Juliachang/SC-CAM" owner: "Juliachang" framework: FRAMEWORK_PYTORCH - number_of_stars: 128 + number_of_stars: 134 description: "SC-CAM: Weakly-Supervised Semantic Segmentation via Sub-category Exploration (CVPR 2020)" } } @@ -29188,7 +29483,7 @@ pr_id_to_video: { url: "https://github.com/visinf/1-stage-wseg" owner: "visinf" framework: FRAMEWORK_PYTORCH - number_of_stars: 306 + number_of_stars: 309 description: "Single-Stage Semantic Segmentation from Image Labels (CVPR 2020)" } } @@ -29207,7 +29502,7 @@ pr_id_to_video: { url: "https://github.com/yaoqi-zd/SGAN" owner: "yaoqi-zd" framework: FRAMEWORK_PYTORCH - number_of_stars: 39 + number_of_stars: 38 description: "Saliency Guided Self-attention Network for Weakly and Semi-supervised Semantic Segmentation(IEEE ACCESS)" } } @@ -29243,7 +29538,7 @@ pr_id_to_video: { video_id: "okM_RGkPghw" video_title: "PR-253: FickleNet: Weakly and Semi-supervised Semantic Image Segmentation using Stochastic Inference" number_of_likes: 5 - number_of_views: 371 + number_of_views: 381 published_date: { seconds: 1592224170 } @@ -29311,7 +29606,7 @@ pr_id_to_video: { url: "https://github.com/juhongm999/dhpf" owner: "juhongm999" framework: FRAMEWORK_PYTORCH - number_of_stars: 21 + number_of_stars: 22 description: "Official PyTorch Implementation of Dynamic Hyperpixel Flow, ECCV 2020" } } @@ -29334,7 +29629,7 @@ pr_id_to_video: { url: "https://github.com/SunghwanHong/CATs" owner: "SunghwanHong" framework: FRAMEWORK_PYTORCH - number_of_stars: 42 + number_of_stars: 45 description: "Official implementation of CATs" } } @@ -29384,7 +29679,7 @@ pr_id_to_video: { url: "https://github.com/juhongm999/chm" owner: "juhongm999" framework: FRAMEWORK_PYTORCH - number_of_stars: 22 + number_of_stars: 28 description: "Official PyTorch Implementation of Convolutional Hough Matching Networks, CVPR 2021 (oral)" } } @@ -29419,7 +29714,7 @@ pr_id_to_video: { video_id: "WsPGRELtEVA" video_title: "PR-254: SFNet: Learning Object-aware Semantic Correspondence" number_of_likes: 11 - number_of_views: 443 + number_of_views: 449 published_date: { seconds: 1592753265 } @@ -29456,124 +29751,127 @@ pr_id_to_video: { url: "https://github.com/zhanghang1989/ResNeSt" owner: "zhanghang1989" framework: FRAMEWORK_PYTORCH - number_of_stars: 2818 + number_of_stars: 2849 description: "ResNeSt: Split-Attention Networks" } - repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." - } - repositories: { - url: "https://github.com/mohitktanwr/Deep-Stem-ResNeSt-ISPRS" - owner: "mohitktanwr" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/zhanghang1989/PyTorch-Encoding" owner: "zhanghang1989" framework: FRAMEWORK_PYTORCH - number_of_stars: 1785 + number_of_stars: 1801 description: "A CV toolkit for my papers." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" + url: "https://github.com/open-mmlab/mmdetection" + owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + number_of_stars: 16041 + description: "OpenMMLab Detection Toolbox and Benchmark" } repositories: { - url: "https://github.com/mohitktanwr/ResNeSt_Inverse" - owner: "mohitktanwr" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/dmlc/gluon-cv" + owner: "dmlc" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4863 + description: "Gluon CV Toolkit" } repositories: { - url: "https://github.com/ZJCV/ZCls" - owner: "ZJCV" + url: "https://github.com/zhanghang1989/detectron2-ResNeSt" + owner: "zhanghang1989" framework: FRAMEWORK_PYTORCH - number_of_stars: 86 - description: "Object Classification Training/Inferring Framework" + number_of_stars: 357 + description: "A fork of Detectron2 with ResNeSt backbone" + } + repositories: { + url: "https://github.com/YeongHyeon/ResNeSt-TF2" + owner: "YeongHyeon" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 55 + description: "TensorFlow implementation of \"ResNeSt: Split-Attention Networks\"" } repositories: { url: "https://github.com/PaddlePaddle/PaddleClas" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 + number_of_stars: 2166 description: "A treasure chest for visual recognition powered by PaddlePaddle" } repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" + url: "https://github.com/sailfish009/detectron2-ResNeSt" + owner: "sailfish009" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" + number_of_stars: 1 + description: "A mirror of Detectron2 with ResNeSt backbone " } repositories: { - url: "https://github.com/osmr/imgclsmob" - owner: "osmr" - framework: FRAMEWORK_OTHERS - number_of_stars: 2233 - description: "Sandbox for training deep learning networks" + url: "https://github.com/ferna11i/detectron2_ResNeST" + owner: "ferna11i" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Custom Version of detectron2 with ResNest backbone" + } + repositories: { + url: "https://github.com/shellhue/detectron2-ResNeSt" + owner: "shellhue" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + name: "Split Attention" + full_name: "Split Attention" + description: "A **Split Attention** block enables attention across feature-map groups. As in [ResNeXt blocks](https://paperswithcode.com/method/resnext-block), the feature can be divided into several groups, and the number of feature-map groups is given by a cardinality hyperparameter $K$. The resulting feature-map groups are called cardinal groups. Split Attention blocks introduce a new radix hyperparameter $R$ that indicates the number of splits within a cardinal group, so the total number of feature groups is $G = KR$. We may apply a series of transformations {$\\mathcal{F}\\_1, \\mathcal{F}\\_2, \\cdots\\mathcal{F}\\_G$} to each individual group, then the intermediate representation of each group is $U\\_i = \\mathcal{F}\\_i\\left(X\\right)$, for $i \\in$ {$1, 2, \\cdots{G}$}.\r\n\r\nA combined representation for each cardinal group can be obtained by fusing via an element-wise summation across multiple splits. The representation for $k$-th cardinal group is \r\n$\\hat{U}^k = \\sum_{j=R(k-1)+1}^{R k} U_j $, where $\\hat{U}^k \\in \\mathbb{R}^{H\\times W\\times C/K}$ for $k\\in{1,2,...K}$, and $H$, $W$ and $C$ are the block output feature-map sizes. \r\nGlobal contextual information with embedded channel-wise statistics can be gathered with global average pooling across spatial dimensions $s^k\\in\\mathbb{R}^{C/K}$. Here the $c$-th component is calculated as:\r\n\r\n$$\r\n s^k\\_c = \\frac{1}{H\\times W} \\sum\\_{i=1}^H\\sum\\_{j=1}^W \\hat{U}^k\\_c(i, j).\r\n$$\r\n\r\nA weighted fusion of the cardinal group representation $V^k\\in\\mathbb{R}^{H\\times W\\times C/K}$ is aggregated using channel-wise soft attention, where each feature-map channel is produced using a weighted combination over splits. The $c$-th channel is calculated as:\r\n\r\n$$\r\n V^k_c=\\sum_{i=1}^R a^k_i(c) U_{R(k-1)+i} ,\r\n$$\r\n\r\nwhere $a_i^k(c)$ denotes a (soft) assignment weight given by:\r\n\r\n$$\r\na_i^k(c) =\r\n\\begin{cases}\r\n \\frac{exp(\\mathcal{G}^c_i(s^k))}{\\sum_{j=0}^R exp(\\mathcal{G}^c_j(s^k))} & \\quad\\textrm{if } R>1, \\\\\r\n \\frac{1}{1+exp(-\\mathcal{G}^c_i(s^k))} & \\quad\\textrm{if } R=1,\\\\\r\n\\end{cases}\r\n$$\r\n\r\nand mapping $\\mathcal{G}_i^c$ determines the weight of each split for the $c$-th channel based on the global context representation $s^k$." } methods: { - name: "DeepLabv3" - full_name: "DeepLabv3" - description: "**DeepLabv3** is a semantic segmentation architecture that improves upon DeepLabv2 with several modifications. To handle the problem of segmenting objects at multiple scales, modules are designed which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, the Atrous Spatial Pyramid Pooling module from DeepLabv2 augmented with image-level features encoding global context and further boost performance. \r\n\r\nThe changes to the ASSP module are that the authors apply global average pooling on the last feature map of the model, feed the resulting image-level features to a 1 × 1 convolution with 256 filters (and batch normalization), and then bilinearly upsample the feature to the desired spatial dimension. In the\r\nend, the improved ASPP consists of (a) one 1×1 convolution and three 3 × 3 convolutions with rates = (6, 12, 18) when output stride = 16 (all with 256 filters and batch normalization), and (b) the image-level features.\r\n\r\nAnother interesting difference is that DenseCRF post-processing from DeepLabv2 is no longer needed." + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" } methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + name: "Cutout" + full_name: "Cutout" + description: "**Cutout** is an image augmentation and regularization technique that randomly masks out square regions of input during training. and can be used to improve the robustness and overall performance of convolutional neural networks. The main motivation for cutout comes from the problem of object occlusion, which is commonly encountered in many computer vision tasks, such as object recognition,\r\ntracking, or human pose estimation. By generating new images which simulate occluded examples, we not only better prepare the model for encounters with occlusions in the real world, but the model also learns to take more of the image context into consideration when making decisions" } methods: { - name: "LSTM" - full_name: "Long Short-Term Memory" - description: "An **LSTM** is a type of [recurrent neural network](https://paperswithcode.com/methods/category/recurrent-neural-networks) that addresses the vanishing gradient problem in vanilla RNNs through additional cells, input and output gates. Intuitively, vanishing gradients are solved through additional *additive* components, and forget gate activations, that allow the gradients to flow through the network without vanishing as quickly.\r\n\r\n(Image Source [here](https://medium.com/datadriveninvestor/how-do-lstm-networks-solve-the-problem-of-vanishing-gradients-a6784971a577))\r\n\r\n(Introduced by Hochreiter and Schmidhuber)" + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" } methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." } methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "Inverted Residual Block" + full_name: "Inverted Residual Block" + description: "An **Inverted Residual Block**, sometimes called an **MBConv Block**, is a type of residual block used for image models that uses an inverted structure for efficiency reasons. It was originally proposed for the [MobileNetV2](https://paperswithcode.com/method/mobilenetv2) CNN architecture. It has since been reused for several mobile-optimized CNNs.\r\n\r\nA traditional [Residual Block](https://paperswithcode.com/method/residual-block) has a wide -> narrow -> wide structure with the number of channels. The input has a high number of channels, which are compressed with a 1x1 convolution. The number of channels is then increased again with a 1x1 convolution so input and output can be added. \r\n\r\nIn contrast, an Inverted Residual Block follows a narrow -> wide -> narrow approach, hence the inversion. We first widen with a 1x1 convolution, then use a 3x3 depthwise convolution (which greatly reduces the number of parameters), then we use a 1x1 convolution to reduce the number of channels so input and output can be added." } methods: { - name: "Deformable Convolution" - full_name: "Deformable Convolution" - description: "**Deformable convolutions** add 2D offsets to the regular grid sampling locations in the standard convolution. It enables free form deformation of the sampling grid. The offsets are learned from the preceding feature maps, via additional convolutional layers. Thus, the deformation is conditioned on the input features in a local, dense, and adaptive manner." + name: "Swish" + full_name: "Swish" + description: "**Swish** is an activation function, $f(x) = x \\cdot \\text{sigmoid}(\\beta x)$, where $\\beta$ a learnable parameter. Nearly all implementations do not use the learnable parameter $\\beta$, in which case the activation function is $x\\sigma(x)$ (\"Swish-1\").\r\n\r\nThe function $x\\sigma(x)$ is exactly the [SiLU](https://paperswithcode.com/method/silu), which was introduced by other authors before the swish.\r\nSee [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415) ([GELUs](https://paperswithcode.com/method/gelu)) where the SiLU (Sigmoid Linear Unit) was originally coined, and see [Sigmoid-Weighted Linear Units for Neural Network Function Approximation in Reinforcement Learning](https://arxiv.org/abs/1702.03118) and [Swish: a Self-Gated Activation Function](https://arxiv.org/abs/1710.05941v1) where the same activation function was experimented with later." } methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + name: "Batch Normalization" + full_name: "Batch Normalization" + description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." } methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + name: "SGD with Momentum" + full_name: "SGD with Momentum" + description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" } methods: { - name: "Image Scale Augmentation" - full_name: "Image Scale Augmentation" - description: "Image Scale Augmentation is an augmentation technique where we randomly pick the short size of a image within a dimension range. One use case of this augmentation technique is in object detectiont asks." + name: "FPN" + full_name: "Feature Pyramid Network" + description: "A **Feature Pyramid Network**, or **FPN**, is a feature extractor that takes a single-scale image of an arbitrary size as input, and outputs proportionally sized feature maps at multiple levels, in a fully convolutional fashion. This process is independent of the backbone convolutional architectures. It therefore acts as a generic solution for building feature pyramids inside deep convolutional networks to be used in tasks like object detection.\r\n\r\nThe construction of the pyramid involves a bottom-up pathway and a top-down pathway.\r\n\r\nThe bottom-up pathway is the feedforward computation of the backbone ConvNet, which computes a feature hierarchy consisting of feature maps at several scales with a scaling step of 2. For the feature\r\npyramid, one pyramid level is defined for each stage. The output of the last layer of each stage is used as a reference set of feature maps. For [ResNets](https://paperswithcode.com/method/resnet) we use the feature activations output by each stage’s last residual block. \r\n\r\nThe top-down pathway hallucinates higher resolution features by upsampling spatially coarser, but semantically stronger, feature maps from higher pyramid levels. These features are then enhanced with features from the bottom-up pathway via lateral connections. Each lateral connection merges feature maps of the same spatial size from the bottom-up pathway and the top-down pathway. The bottom-up feature map is of lower-level semantics, but its activations are more accurately localized as it was subsampled fewer times." } } papers: { paper_id: "epsanet-an-efficient-pyramid-split-attention" - title: "EPSANet: An Efficient Pyramid Split Attention Block on Convolutional Neural Network" + title: "EPSANet: An Efficient Pyramid Squeeze Attention Block on Convolutional Neural Network" arxiv_id: "2105.14447" - abstract: "Recently, it has been demonstrated that the performance of a deep convolutional neural network can be effectively improved by embedding an attention module into it. In this work, a novel lightweight and effective attention method named Pyramid Split Attention (PSA) module is proposed. By replacing the 3x3 convolution with the PSA module in the bottleneck blocks of the ResNet, a novel representational block named Efficient Pyramid Split Attention (EPSA) is obtained. The EPSA block can be easily added as a plug-and-play component into a well-established backbone network, and significant improvements on model performance can be achieved. Hence, a simple and efficient backbone architecture named EPSANet is developed in this work by stacking these ResNet-style EPSA blocks. Correspondingly, a stronger multi-scale representation ability can be offered by the proposed EPSANet for various computer vision tasks including but not limited to, image classification, object detection, instance segmentation, etc. Without bells and whistles, the performance of the proposed EPSANet outperforms most of the state-of-the-art channel attention methods. As compared to the SENet-50, the Top-1 accuracy is improved by 1.93 % on ImageNet dataset, a larger margin of +2.7 box AP for object detection and an improvement of +1.7 mask AP for instance segmentation by using the Mask-RCNN on MS-COCO dataset are obtained. Our source code is available at:https://github.com/murufeng/EPSANet." + abstract: "Recently, it has been demonstrated that the performance of a deep convolutional neural network can be effectively improved by embedding an attention module into it. In this work, a novel lightweight and effective attention method named Pyramid Squeeze Attention (PSA) module is proposed. By replacing the 3x3 convolution with the PSA module in the bottleneck blocks of the ResNet, a novel representational block named Efficient Pyramid Squeeze Attention (EPSA) is obtained. The EPSA block can be easily added as a plug-and-play component into a well-established backbone network, and significant improvements on model performance can be achieved. Hence, a simple and efficient backbone architecture named EPSANet is developed in this work by stacking these ResNet-style EPSA blocks. Correspondingly, a stronger multi-scale representation ability can be offered by the proposed EPSANet for various computer vision tasks including but not limited to, image classification, object detection, instance segmentation, etc. Without bells and whistles, the performance of the proposed EPSANet outperforms most of the state-of-the-art channel attention methods. As compared to the SENet-50, the Top-1 accuracy is improved by 1.93% on ImageNet dataset, a larger margin of +2.7 box AP for object detection and an improvement of +1.7 mask AP for instance segmentation by using the Mask-RCNN on MS-COCO dataset are obtained. Our source code is available at:https://github.com/murufeng/EPSANet." published_date: { seconds: 1622332800 } @@ -29586,7 +29884,7 @@ pr_id_to_video: { url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" owner: "xmu-xiaoma666" framework: FRAMEWORK_PYTORCH - number_of_stars: 840 + number_of_stars: 1492 description: "🍀 Pytorch implementation of various Attention Mechanisms, MLP, Re-parameter, Convolution, which is helpful to further understand papers.⭐⭐⭐" } repositories: { @@ -29594,8 +29892,7 @@ pr_id_to_video: { url: "https://github.com/murufeng/EPSANet" owner: "murufeng" framework: FRAMEWORK_PYTORCH - number_of_stars: 56 - description: "EPSANet:An Efficient Pyramid Split Attention Block on Convolutional Neural Network" + number_of_stars: 67 } methods: { name: "ResNet" @@ -29668,7 +29965,7 @@ pr_id_to_video: { url: "https://github.com/mdswyz/SISN-Face-Hallucination" owner: "mdswyz" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + number_of_stars: 7 description: "An official implementation of \"Face Hallucination via Split-Attention in Split-Attention Network\" in PyTorch. (ACM MM 2021)" } methods: { @@ -29770,7 +30067,7 @@ pr_id_to_video: { url: "https://github.com/PaddlePaddle/PaddleClas" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 + number_of_stars: 2166 description: "A treasure chest for visual recognition powered by PaddlePaddle" } methods: { @@ -29838,7 +30135,7 @@ pr_id_to_video: { url: "https://github.com/abhinavsagar/DMSANet" owner: "abhinavsagar" framework: FRAMEWORK_OTHERS - number_of_stars: 13 + number_of_stars: 20 description: "Code for the paper DMSANet: Dual Multi Scale Attention Network" } } @@ -29864,8 +30161,8 @@ pr_id_to_video: { video: { video_id: "65MLer7adGo" video_title: "PR-255: ResNeSt: Split-Attention Networks" - number_of_likes: 21 - number_of_views: 1277 + number_of_likes: 22 + number_of_views: 1306 published_date: { seconds: 1592747333 } @@ -29917,65 +30214,48 @@ pr_id_to_video: { authors: "Ilya Sutskever" authors: "Dario Amodei" repositories: { - url: "https://github.com/crazydigger/Callibration-of-GPT" - owner: "crazydigger" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/dl4nlp-tuda2021/deep-learning-for-nlp-lectures" - owner: "dl4nlp-tuda2021" + url: "https://github.com/facebookresearch/anli" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 81 - description: "Deep Learning for Natural Language Processing - Lectures 2021" + number_of_stars: 225 + description: "Adversarial Natural Language Inference Benchmark" } repositories: { - url: "https://github.com/roberttwomey/machine-imagination-workshop" - owner: "roberttwomey" + is_official: true + url: "https://github.com/openai/gpt-3" + owner: "openai" framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "text to image notebook with CLIP for workshop on Machine Imagination, Spring 2021" - } - repositories: { - url: "https://github.com/ethanjperez/true_few_shot" - owner: "ethanjperez" - framework: FRAMEWORK_PYTORCH - number_of_stars: 68 - description: "Code for the paper \"True Few-Shot Learning in Language Models\" (https://arxiv.org/abs/2105.11447)" + number_of_stars: 10551 + description: "GPT-3: Language Models are Few-Shot Learners" } repositories: { - url: "https://github.com/EleutherAI/lm_evaluation_harness" + url: "https://github.com/EleutherAI/lm-evaluation-harness" owner: "EleutherAI" framework: FRAMEWORK_OTHERS - number_of_stars: 87 + number_of_stars: 100 description: "A framework for few-shot evaluation of autoregressive language models." } repositories: { - url: "https://github.com/EleutherAI/gpt-neo" - owner: "EleutherAI" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5309 - description: "An implementation of model parallel GPT-2 and GPT-3-style models using the mesh-tensorflow library." + url: "https://github.com/hilberthit/gpt-3" + owner: "hilberthit" + framework: FRAMEWORK_OTHERS } repositories: { - url: "https://github.com/EleutherAI/lm-evaluation-harness" - owner: "EleutherAI" + url: "https://github.com/MESPA/npl" + owner: "MESPA" framework: FRAMEWORK_OTHERS - number_of_stars: 87 - description: "A framework for few-shot evaluation of autoregressive language models." } repositories: { - url: "https://github.com/gmum/dl-mo-2021" - owner: "gmum" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 - description: "Deep Learning with Multiple Objectives: 2021 edition" + url: "https://github.com/scrayish/ML_NLP" + owner: "scrayish" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } repositories: { url: "https://github.com/tonyzhaozh/few-shot-learning" owner: "tonyzhaozh" framework: FRAMEWORK_PYTORCH - number_of_stars: 95 + number_of_stars: 114 description: "Few-shot Learning of GPT-3" } repositories: { @@ -29985,6 +30265,19 @@ pr_id_to_video: { number_of_stars: 50 description: "Measuring Massive Multitask Language Understanding | ICLR 2021" } + repositories: { + url: "https://github.com/EleutherAI/gpt-neo" + owner: "EleutherAI" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5482 + description: "An implementation of model parallel GPT-2 and GPT-3-style models using the mesh-tensorflow library." + } + repositories: { + url: "https://github.com/crazydigger/Callibration-of-GPT" + owner: "crazydigger" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } methods: { name: "Attention Dropout" full_name: "Attention Dropout" @@ -30050,14 +30343,14 @@ pr_id_to_video: { url: "https://github.com/rrmenon10/ADAPET" owner: "rrmenon10" framework: FRAMEWORK_PYTORCH - number_of_stars: 58 + number_of_stars: 61 } repositories: { is_official: true url: "https://github.com/timoschick/fewglue" owner: "timoschick" framework: FRAMEWORK_OTHERS - number_of_stars: 118 + number_of_stars: 125 description: "This repository contains the FewGLUE dataset for few-shot natural language understanding." } repositories: { @@ -30065,7 +30358,7 @@ pr_id_to_video: { url: "https://github.com/timoschick/pet" owner: "timoschick" framework: FRAMEWORK_PYTORCH - number_of_stars: 968 + number_of_stars: 1015 description: "This repository contains the code for \"Exploiting Cloze Questions for Few-Shot Text Classification and Natural Language Inference\"" } methods: { @@ -30198,7 +30491,7 @@ pr_id_to_video: { url: "https://github.com/princeton-nlp/LM-BFF" owner: "princeton-nlp" framework: FRAMEWORK_PYTORCH - number_of_stars: 254 + number_of_stars: 293 description: "ACL'2021: LM-BFF: Better Few-shot Fine-tuning of Language Models" } methods: { @@ -30265,20 +30558,20 @@ pr_id_to_video: { authors: "Shi Feng" authors: "Dan Klein" authors: "Sameer Singh" - repositories: { - url: "https://github.com/crazydigger/Callibration-of-GPT" - owner: "crazydigger" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } repositories: { is_official: true url: "https://github.com/tonyzhaozh/few-shot-learning" owner: "tonyzhaozh" framework: FRAMEWORK_PYTORCH - number_of_stars: 95 + number_of_stars: 114 description: "Few-shot Learning of GPT-3" } + repositories: { + url: "https://github.com/crazydigger/Callibration-of-GPT" + owner: "crazydigger" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } methods: { name: "Attention Dropout" full_name: "Attention Dropout" @@ -30395,7 +30688,7 @@ pr_id_to_video: { video_id: "2uGaXv_ds-k" video_title: "PR-256: GPT-3 : Language Models are Few-Shot Learners" number_of_likes: 19 - number_of_views: 807 + number_of_views: 849 published_date: { seconds: 1592832055 } @@ -30422,7 +30715,7 @@ pr_id_to_video: { url: "https://github.com/cedricoeldorf/ConditionalStyleGAN" owner: "cedricoeldorf" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 164 + number_of_stars: 169 description: "Conditional implementation for NVIDIA's StyleGAN architecture " } methods: { @@ -30479,7 +30772,7 @@ pr_id_to_video: { video_id: "PP1jejaXo3s" video_title: "PR-257: LoGANv2: Conditional Style-Based Logo Generation with Generative Adversarial Networks" number_of_likes: 10 - number_of_views: 376 + number_of_views: 394 published_date: { seconds: 1593354282 } @@ -30599,7 +30892,7 @@ pr_id_to_video: { url: "https://github.com/google-research/reassessed-imagenet" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 119 + number_of_stars: 121 description: "Labels and other data for the paper \"Are we done with ImageNet?\"" } } @@ -30619,7 +30912,7 @@ pr_id_to_video: { url: "https://github.com/cgnorthcutt/cleanlab" owner: "cgnorthcutt" framework: FRAMEWORK_PYTORCH - number_of_stars: 2036 + number_of_stars: 2114 description: "The standard package for machine learning with noisy labels and finding mislabeled data. Works with most datasets and models." } repositories: { @@ -30627,7 +30920,7 @@ pr_id_to_video: { url: "https://github.com/cgnorthcutt/label-errors" owner: "cgnorthcutt" framework: FRAMEWORK_PYTORCH - number_of_stars: 30 + number_of_stars: 35 description: "🛠️ Corrected Test Sets for ImageNet, MNIST, CIFAR, Caltech-256, QuickDraw, IMDB, Amazon Reviews, 20News, and AudioSet" } } @@ -30686,7 +30979,7 @@ pr_id_to_video: { url: "https://github.com/naver-ai/relabel_imagenet" owner: "naver-ai" framework: FRAMEWORK_PYTORCH - number_of_stars: 298 + number_of_stars: 304 } methods: { name: "CutMix" @@ -30707,18 +31000,18 @@ pr_id_to_video: { authors: "Aleksander Madry" repositories: { is_official: true - url: "https://github.com/MadryLab/BREEDS-Benchmarks" + url: "https://github.com/MadryLab/robustness" owner: "MadryLab" - framework: FRAMEWORK_OTHERS - number_of_stars: 31 + framework: FRAMEWORK_PYTORCH + number_of_stars: 572 + description: "A library for experimenting with, training and evaluating neural networks, with a focus on adversarial robustness." } repositories: { is_official: true - url: "https://github.com/MadryLab/robustness" + url: "https://github.com/MadryLab/BREEDS-Benchmarks" owner: "MadryLab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 559 - description: "A library for experimenting with, training and evaluating neural networks, with a focus on adversarial robustness." + framework: FRAMEWORK_OTHERS + number_of_stars: 32 } } papers: { @@ -30734,11 +31027,11 @@ pr_id_to_video: { authors: "Eduard Hovy" authors: "Quoc V. Le" repositories: { - url: "https://github.com/mhd-medfa/NoisyStudent-Based-Object-Recognition" - owner: "mhd-medfa" + url: "https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet" + owner: "official" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "3rd place solution" + number_of_stars: 4415 + description: "Reference models and tools for Cloud TPUs." } repositories: { url: "https://github.com/stanleyjzheng/PyData" @@ -30748,37 +31041,45 @@ pr_id_to_video: { description: "Accompanying notebook and sources to \"A Guide to Pseudolabelling: How to get a Kaggle medal with only one model\" (Dec. 2020 PyData Boston-Cambridge Keynote)" } repositories: { - url: "https://github.com/thomasly/PaperTranslation" - owner: "thomasly" + url: "https://github.com/adventure2165/Summarization_self-training_with_noisy_student_improves_imagenet_classification" + owner: "adventure2165" framework: FRAMEWORK_OTHERS - description: "Translations for collections of English papers" - } - repositories: { - url: "https://github.com/xultaeculcis/coral-net" - owner: "xultaeculcis" - framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } repositories: { - url: "https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet" - owner: "official" + is_official: true + url: "https://github.com/tensorflow/tpu" + owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 + number_of_stars: 4415 description: "Reference models and tools for Cloud TPUs." } - repositories: { - url: "https://github.com/adventure2165/Summarization_self-training_with_noisy_student_improves_imagenet_classification" - owner: "adventure2165" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - } repositories: { is_official: true url: "https://github.com/google-research/noisystudent" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 557 + number_of_stars: 578 description: "Code for Noisy Student Training. https://arxiv.org/abs/1911.04252" } + repositories: { + url: "https://github.com/mhd-medfa/NoisyStudent-Based-Object-Recognition" + owner: "mhd-medfa" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "3rd place solution" + } + repositories: { + url: "https://github.com/xultaeculcis/coral-net" + owner: "xultaeculcis" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/thomasly/PaperTranslation" + owner: "thomasly" + framework: FRAMEWORK_OTHERS + description: "Translations for collections of English papers" + } methods: { name: "RandAugment" full_name: "RandAugment" @@ -30833,8 +31134,8 @@ pr_id_to_video: { video: { video_id: "CPMgX5ikL_8" video_title: "PR-258: From ImageNet to Image Classification: Contextualizing Progress on Benchmarks" - number_of_likes: 13 - number_of_views: 1034 + number_of_likes: 14 + number_of_views: 1050 published_date: { seconds: 1593963122 } @@ -30864,7 +31165,7 @@ pr_id_to_video: { url: "https://github.com/tadorfer/NLProt" owner: "tadorfer" framework: FRAMEWORK_OTHERS - number_of_stars: 7 + number_of_stars: 8 description: "NLP for Proteins - A paper collection" } repositories: { @@ -30872,7 +31173,7 @@ pr_id_to_video: { url: "https://github.com/salesforce/provis" owner: "salesforce" framework: FRAMEWORK_PYTORCH - number_of_stars: 214 + number_of_stars: 217 description: "Official code repository of \"BERTology Meets Biology: Interpreting Attention in Protein Language Models.\"" } methods: { @@ -30942,7 +31243,7 @@ pr_id_to_video: { url: "https://github.com/googleinterns/protein-embedding-retrieval" owner: "googleinterns" framework: FRAMEWORK_OTHERS - number_of_stars: 27 + number_of_stars: 29 } } papers: { @@ -30998,7 +31299,7 @@ pr_id_to_video: { url: "https://github.com/google-research/google-research" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 + number_of_stars: 18790 description: "Google Research" } methods: { @@ -31216,66 +31517,66 @@ pr_id_to_video: { authors: "Lucy Colwell" authors: "Adrian Weller" repositories: { - url: "https://github.com/microsoft/vision-longformer" - owner: "microsoft" - framework: FRAMEWORK_PYTORCH - number_of_stars: 103 + is_official: true + url: "https://github.com/google-research/google-research" + owner: "google-research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18790 + description: "Google Research" } repositories: { - url: "https://github.com/lucidrains/se3-transformer-pytorch" - owner: "lucidrains" + url: "https://github.com/idiap/fast-transformers" + owner: "idiap" framework: FRAMEWORK_PYTORCH - number_of_stars: 140 - description: "Implementation of SE3-Transformers for Equivariant Self-Attention, in Pytorch. This specific repository is geared towards integration with eventual Alphafold2 replication." + number_of_stars: 957 + description: "Pytorch library for fast transformer implementations" } repositories: { - url: "https://github.com/ShivamRajSharma/Transformer-Architectures-From-Scratch" - owner: "ShivamRajSharma" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Implementation of transformers based architecture in PyTorch. " + url: "https://github.com/teddykoker/performer" + owner: "teddykoker" + framework: FRAMEWORK_OTHERS + number_of_stars: 27 + description: "Simply Numpy implementation of the FAVOR+ attention mechanism, https://teddykoker.com/2020/11/performers/" } repositories: { - url: "https://github.com/lucidrains/performer-pytorch" - owner: "lucidrains" + url: "https://github.com/cloneofsimo/smallest_working_performer" + owner: "cloneofsimo" framework: FRAMEWORK_PYTORCH - number_of_stars: 649 - description: "An implementation of Performer, a linear attention-based transformer, in Pytorch" + number_of_stars: 9 } repositories: { url: "https://github.com/xl402/performer" owner: "xl402" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 21 + number_of_stars: 22 description: "Tensorflow implementation of a linear attention architecture" } repositories: { - url: "https://github.com/teddykoker/performer" - owner: "teddykoker" - framework: FRAMEWORK_OTHERS - number_of_stars: 25 - description: "Simply Numpy implementation of the FAVOR+ attention mechanism, https://teddykoker.com/2020/11/performers/" + url: "https://github.com/lucidrains/se3-transformer-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 146 + description: "Implementation of SE3-Transformers for Equivariant Self-Attention, in Pytorch. This specific repository is geared towards integration with eventual Alphafold2 replication." } repositories: { - url: "https://github.com/cloneofsimo/smallest_working_performer" - owner: "cloneofsimo" + url: "https://github.com/lucidrains/performer-pytorch" + owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 9 + number_of_stars: 687 + description: "An implementation of Performer, a linear attention-based transformer, in Pytorch" } repositories: { - url: "https://github.com/idiap/fast-transformers" - owner: "idiap" + url: "https://github.com/ShivamRajSharma/Transformer-Architectures-From-Scratch" + owner: "ShivamRajSharma" framework: FRAMEWORK_PYTORCH - number_of_stars: 925 - description: "Pytorch library for fast transformer implementations" + number_of_stars: 11 + description: "Implementation of transformers based architecture in PyTorch. " } repositories: { - is_official: true - url: "https://github.com/google-research/google-research" - owner: "google-research" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 - description: "Google Research" + url: "https://github.com/microsoft/vision-longformer" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 114 } methods: { name: "Residual Connection" @@ -31332,7 +31633,7 @@ pr_id_to_video: { video_id: "bUOEu1laC9g" video_title: "PR-259: BERTology meets Biology: Interpreting attention in protein language modeling" number_of_likes: 10 - number_of_views: 455 + number_of_views: 465 published_date: { seconds: 1593956917 } @@ -31358,116 +31659,118 @@ pr_id_to_video: { authors: "Saining Xie" authors: "Ross Girshick" repositories: { - url: "https://github.com/joshr17/IFM" - owner: "joshr17" + url: "https://github.com/HobbitLong/CMC" + owner: "HobbitLong" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "Code for paper \"Can contrastive learning avoid shortcut solutions?\"" + number_of_stars: 1059 + description: "[ECCV 2020] \"Contrastive Multiview Coding\", also contains implementations for MoCo and InstDis" } repositories: { - url: "https://github.com/beresandras/contrastive-classification-keras" - owner: "beresandras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "Implementation of self-supervised image-level contrastive pretraining methods using Keras." + url: "https://github.com/bl0/moco" + owner: "bl0" + framework: FRAMEWORK_PYTORCH + number_of_stars: 121 + description: "Unofficial implementation with pytorch DistributedDataParallel for \"MoCo: Momentum Contrast for Unsupervised Visual Representation Learning\"" } repositories: { - url: "https://github.com/amazon-research/exponential-moving-average-normalization" - owner: "amazon-research" + url: "https://github.com/HobbitLong/PyContrast" + owner: "HobbitLong" framework: FRAMEWORK_PYTORCH - number_of_stars: 19 - description: "PyTorch implementation of EMAN for self-supervised and semi-supervised learning: https://arxiv.org/abs/2101.08482" + number_of_stars: 1349 + description: "PyTorch implementation of Contrastive Learning methods; List of awesome-contrastive-learning papers" } repositories: { - url: "https://github.com/lightly-ai/lightly" - owner: "lightly-ai" + url: "https://github.com/szq0214/CMC_with_Image_Mixture" + owner: "szq0214" framework: FRAMEWORK_PYTORCH - number_of_stars: 1064 - description: "A python library for self-supervised learning on images." + number_of_stars: 16 + description: "pytorch implementation of \"Contrastive Multiview Coding\", \"Momentum Contrast for Unsupervised Visual Representation Learning\", and \"Unsupervised Feature Learning via Non-Parametric Instance-level Discrimination\"" } repositories: { - url: "https://github.com/sroikl/MoCo" - owner: "sroikl" + url: "https://github.com/YyzHarry/imbalanced-semi-self" + owner: "YyzHarry" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 474 + description: "[NeurIPS 2020] Semi-Supervision (Unlabeled Data) & Self-Supervision Improve Class-Imbalanced / Long-Tailed Learning" } repositories: { - url: "https://github.com/KevinMusgrave/pytorch_metric_learning" - owner: "KevinMusgrave" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3400 - description: "The easiest way to use deep metric learning in your application. Modular, flexible, and extensible. Written in PyTorch." + url: "https://github.com/PaperCodeReview/MoCo-TF" + owner: "PaperCodeReview" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 17 + description: "TF 2.x implementation of MoCo v1 (Momentum Contrast for Unsupervised Visual Representation Learning, CVPR 2020) and MoCo v2 (Improved Baselines with Momentum Contrastive Learning, 2020)." } repositories: { - url: "https://github.com/kikacaty/adv_guide" - owner: "kikacaty" + url: "https://github.com/AidenDurrant/MoCo-Pytorch" + owner: "AidenDurrant" framework: FRAMEWORK_PYTORCH + number_of_stars: 37 + description: "An unofficial Pytorch implementation of \"Improved Baselines with Momentum Contrastive Learning\" (MoCoV2) - X. Chen, et al." } repositories: { - url: "https://github.com/KevinMusgrave/pytorch-metric-learning" - owner: "KevinMusgrave" + url: "https://github.com/joshr17/IFM" + owner: "joshr17" framework: FRAMEWORK_PYTORCH - number_of_stars: 3400 - description: "The easiest way to use deep metric learning in your application. Modular, flexible, and extensible. Written in PyTorch." + number_of_stars: 10 + description: "Code for paper \"Can contrastive learning avoid shortcut solutions?\"" } repositories: { - url: "https://github.com/facebookresearch/vissl" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1788 - description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." + is_official: true + url: "https://github.com/ppwwyyxx/moco.tensorflow" + owner: "ppwwyyxx" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 130 + description: "A TensorFlow re-implementation of Momentum Contrast (MoCo): https://arxiv.org/abs/1911.05722" } repositories: { - url: "https://github.com/ehsanik/muscleTorch" - owner: "ehsanik" + url: "https://github.com/1170500804/MoCo" + owner: "1170500804" framework: FRAMEWORK_PYTORCH - number_of_stars: 21 - description: "What Can You Learn from Your Muscles? Learning Visual Representation from Human Interactions (https://arxiv.org/pdf/2010.08539.pdf)" } methods: { - name: "MoCo" - full_name: "Momentum Contrast" - description: "**MoCo**, or **Momentum Contrast**, is a self-supervised learning algorithm with a contrastive loss. \r\n\r\nContrastive loss methods can be thought of as building dynamic dictionaries. The \"keys\" (tokens) in the dictionary are sampled from data (e.g., images or patches) and are represented by an encoder network. Unsupervised learning trains encoders to perform dictionary look-up: an encoded “query” should be similar to its matching key and dissimilar to others. Learning is formulated as minimizing a contrastive loss. \r\n\r\nMoCo can be viewed as a way to build large and consistent dictionaries for unsupervised learning with a contrastive loss. In MoCo, we maintain the dictionary as a queue of data samples: the encoded representations of the current mini-batch are enqueued, and the oldest are dequeued. The queue decouples the dictionary size from the mini-batch size, allowing it to be large. Moreover, as the dictionary keys come from the preceding several mini-batches, a slowly progressing key encoder, implemented as a momentum-based moving average of the query encoder, is proposed to maintain consistency." + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." } methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" } methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "Residual Block" + full_name: "Residual Block" + description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." } methods: { - name: "Mask R-CNN" - full_name: "Mask R-CNN" - description: "**Mask R-CNN** extends [Faster R-CNN](http://paperswithcode.com/method/faster-r-cnn) to solve instance segmentation tasks. It achieves this by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. In principle, Mask R-CNN is an intuitive extension of Faster R-CNN, but constructing the mask branch properly is critical for good results. \r\n\r\nMost importantly, Faster R-CNN was not designed for pixel-to-pixel alignment between network inputs and outputs. This is evident in how [RoIPool](http://paperswithcode.com/method/roi-pooling), the *de facto* core operation for attending to instances, performs coarse spatial quantization for feature extraction. To fix the misalignment, Mask R-CNN utilises a simple, quantization-free layer, called [RoIAlign](http://paperswithcode.com/method/roi-align), that faithfully preserves exact spatial locations. \r\n\r\nSecondly, Mask R-CNN *decouples* mask and class prediction: it predicts a binary mask for each class independently, without competition among classes, and relies on the network's RoI classification branch to predict the category. In contrast, an [FCN](http://paperswithcode.com/method/fcn) usually perform per-pixel multi-class categorization, which couples segmentation and classification." + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "Random Grayscale" - full_name: "Random Grayscale" - description: "**Random Grayscale** is an image data augmentation that converts an image to grayscale with probability $p$." + name: "Max Pooling" + full_name: "Max Pooling" + description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" } methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + name: "Exponential Decay" + full_name: "Exponential Decay" + description: "**Exponential Decay** is a learning rate schedule where we decay the learning rate with more iterations using an exponential function:\r\n\r\n$$ \\text{lr} = \\text{lr}\\_{0}\\exp\\left(-kt\\right) $$\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" } methods: { - name: "Residual Block" - full_name: "Residual Block" - description: "**Residual Blocks** are skip-connection blocks that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture.\r\n \r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$. The additional $x$ acts like a residual, hence the name 'residual block'.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers. Having skip connections allows the network to more easily learn identity-like mappings.\r\n\r\nNote that in practice, [Bottleneck Residual Blocks](https://paperswithcode.com/method/bottleneck-residual-block) are used for deeper ResNets, such as ResNet-50 and ResNet-101, as these bottleneck blocks are less computationally intensive." + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "InfoNCE" - full_name: "InfoNCE" - description: "**InfoNCE**, where NCE stands for Noise-Contrastive Estimation, is a type of contrastive loss function used for [self-supervised learning](https://paperswithcode.com/methods/category/self-supervised-learning).\r\n\r\nGiven a set $X = ${$x\\_{1}, \\dots, x\\_{N}$} of $N$ random samples containing one positive sample from $p\\left(x\\_{t+k}|c\\_{t}\\right)$ and $N − 1$ negative samples from the 'proposal' distribution $p\\left(x\\_{t+k}\\right)$, we optimize:\r\n\r\n$$ \\mathcal{L}\\_{N} = - \\mathbb{E}\\_{X}\\left[\\log\\frac{f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right)}{\\sum\\_{x\\_{j}\\in{X}}f\\_{k}\\left(x\\_{j}, c\\_{t}\\right)}\\right] $$\r\n\r\nOptimizing this loss will result in $f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right)$ estimating the density ratio, which is:\r\n\r\n$$ f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right) \\propto \\frac{p\\left(x\\_{t+k}|c\\_{t}\\right)}{p\\left(x\\_{t+k}\\right)} $$" + name: "Random Grayscale" + full_name: "Random Grayscale" + description: "**Random Grayscale** is an image data augmentation that converts an image to grayscale with probability $p$." } methods: { - name: "Exponential Decay" - full_name: "Exponential Decay" - description: "**Exponential Decay** is a learning rate schedule where we decay the learning rate with more iterations using an exponential function:\r\n\r\n$$ \\text{lr} = \\text{lr}\\_{0}\\exp\\left(-kt\\right) $$\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" } methods: { name: "Kaiming Initialization" @@ -31505,7 +31808,7 @@ pr_id_to_video: { url: "https://github.com/dddzg/unimoco" owner: "dddzg" framework: FRAMEWORK_PYTORCH - number_of_stars: 28 + number_of_stars: 29 description: "UniMoCo: Unsupervised, Semi-Supervised and Full-Supervised Visual Representation Learning" } methods: { @@ -31665,15 +31968,15 @@ pr_id_to_video: { url: "https://github.com/maple-research-lab/AdCo" owner: "maple-research-lab" framework: FRAMEWORK_PYTORCH - number_of_stars: 101 + number_of_stars: 112 description: "AdCo: Adversarial Contrast for Efficient Learning of Unsupervised Representations from Self-Trained Negative Adversaries" } } video: { video_id: "2Undxq7jlsA" video_title: "PR-260: Momentum Contrast for Unsupervised Visual Representation Learning" - number_of_likes: 37 - number_of_views: 2554 + number_of_likes: 40 + number_of_views: 2686 published_date: { seconds: 1594561765 } @@ -31704,7 +32007,7 @@ pr_id_to_video: { url: "https://github.com/mtoneva/example_forgetting" owner: "mtoneva" framework: FRAMEWORK_PYTORCH - number_of_stars: 98 + number_of_stars: 101 } } papers: { @@ -31817,7 +32120,7 @@ pr_id_to_video: { url: "https://github.com/goodfeli/forgetting" owner: "goodfeli" framework: FRAMEWORK_OTHERS - number_of_stars: 48 + number_of_stars: 49 description: "Repository of code for the experiments for the ICLR submission \"An Empirical Investigation of Catastrophic Forgetting in Gradient-Based Networks\"" } } @@ -31838,7 +32141,7 @@ pr_id_to_video: { url: "https://github.com/nabk89/NAS-with-Proxy-data" owner: "nabk89" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + number_of_stars: 5 description: "Official code of \"NAS acceleration via proxy data\", IJCAI21" } methods: { @@ -31889,8 +32192,8 @@ pr_id_to_video: { video: { video_id: "69VE4WxUlUI" video_title: "PR-261: Empirical Study of Forgetting Events during Deep Neural Network Learning" - number_of_likes: 8 - number_of_views: 454 + number_of_likes: 9 + number_of_views: 459 published_date: { seconds: 1594565691 } @@ -31917,7 +32220,7 @@ pr_id_to_video: { url: "https://github.com/ilovepose/fast-human-pose-estimation.pytorch" owner: "ilovepose" framework: FRAMEWORK_PYTORCH - number_of_stars: 308 + number_of_stars: 310 description: "Official pytorch Code for CVPR2019 paper \"Fast Human Pose Estimation\" https://arxiv.org/abs/1811.05419" } } @@ -31934,65 +32237,72 @@ pr_id_to_video: { authors: "Dong Liu" authors: "Jingdong Wang" repositories: { - url: "https://github.com/leeyegy/SimDR" - owner: "leeyegy" + url: "https://github.com/HRNet/HRNet-Facial-Landmark-Detection" + owner: "HRNet" framework: FRAMEWORK_PYTORCH - number_of_stars: 36 - description: "PyTorch implementation for: Is 2D Heatmap Representation Even Necessary for Human Pose Estimation? (http://arxiv.org/abs/2107.03332)" + number_of_stars: 715 + description: "This is an official implementation of facial landmark detection for our TPAMI paper \"Deep High-Resolution Representation Learning for Visual Recognition\". https://arxiv.org/abs/1908.07919" } repositories: { - url: "https://github.com/ducongju/HRNet" - owner: "ducongju" + url: "https://github.com/laowang666888/HRNET" + owner: "laowang666888" framework: FRAMEWORK_PYTORCH - description: "The project is an official implementation of our CVPR2019 paper \"Deep High-Resolution Representation Learning for Human Pose Estimation\"" } repositories: { - url: "https://github.com/HRNet/HRNet-Human-Pose-Estimation" + url: "https://github.com/HRNet/HRNet-Image-Classification" owner: "HRNet" framework: FRAMEWORK_PYTORCH - number_of_stars: 73 - description: "This repo is copied from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch" + number_of_stars: 694 + description: "Train the HRNet model on ImageNet" } repositories: { - url: "https://github.com/goutern/PoseEstimation" - owner: "goutern" + url: "https://github.com/CASIA-IVA-Lab/ISP-reID" + owner: "CASIA-IVA-Lab" framework: FRAMEWORK_PYTORCH - description: "Pose estimation models" + number_of_stars: 56 + description: "ISP-reID" } repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" + url: "https://github.com/chuanqichen/deepcoaching" + owner: "chuanqichen" framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + number_of_stars: 3 + description: "Sports Coaching from Pose Estimation" } repositories: { - url: "https://github.com/d-shivam/Pose-estimation-based-action-recognition-for-help-Situation-Identification" - owner: "d-shivam" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "The project is a modified implementation of CVPR2019 paper \"Deep High-Resolution Representation Learning for Human Pose Estimation\". It has been changed to train the model on standard dataset as well as drone captured dataset to help in search and rescue operation in Natural Disaster like situation." + url: "https://github.com/mks0601/PoseFix_RELEASE" + owner: "mks0601" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 305 + description: "Official TensorFlow implementation of \"PoseFix: Model-agnostic General Human Pose Refinement Network\", CVPR 2019" } repositories: { - url: "https://github.com/abhi1kumar/hrnet_pose_single_gpu" - owner: "abhi1kumar" + url: "https://github.com/HRNet/HRNet-Semantic-Segmentation" + owner: "HRNet" framework: FRAMEWORK_PYTORCH - description: "Forked from original HR-Net Pose but made to run to single GPU" + number_of_stars: 2182 + description: "The OCR approach is rephrased as Segmentation Transformer: https://arxiv.org/abs/1909.11065. This is an official implementation of semantic segmentation for HRNet. https://arxiv.org/abs/1908.07919" } repositories: { - url: "https://github.com/visionNoob/hrnet_pytorch" - owner: "visionNoob" + url: "https://github.com/HRNet/HRNet-Object-Detection" + owner: "HRNet" framework: FRAMEWORK_PYTORCH + number_of_stars: 563 + description: "Object detection with multi-level representations generated from deep high-resolution representation learning (HRNetV2h). This is an official implementation for our TPAMI paper \"Deep High-Resolution Representation Learning for Visual Recognition\". https://arxiv.org/abs/1908.07919" } repositories: { - url: "https://github.com/ken724049/action-recognition" - owner: "ken724049" - framework: FRAMEWORK_OTHERS + url: "https://github.com/HRNet/HRNet-Human-Pose-Estimation" + owner: "HRNet" + framework: FRAMEWORK_PYTORCH + number_of_stars: 81 + description: "This repo is copied from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch" } repositories: { - url: "https://github.com/wsjzha/deep-high-resolution-net.pytorch" - owner: "wsjzha" + url: "https://github.com/HRNet/HRNet-MaskRCNN-Benchmark" + owner: "HRNet" framework: FRAMEWORK_PYTORCH + number_of_stars: 123 + description: "Object detection with multi-level representations generated from deep high-resolution representation learning (HRNetV2h)." } methods: { name: "Heatmap" @@ -32011,10 +32321,11 @@ pr_id_to_video: { authors: "Lorenzo Bertoni" authors: "Alexandre Alahi" repositories: { - url: "https://github.com/thanhtrung98/Pose_estimation" - owner: "thanhtrung98" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 + url: "https://github.com/vita-epfl/openpifpafwebdemo" + owner: "vita-epfl" + framework: FRAMEWORK_PYTORCH + number_of_stars: 81 + description: "Web browser based demo of OpenPifPaf." } repositories: { url: "https://github.com/thanhtrung98/human_pose_estimation" @@ -32023,11 +32334,10 @@ pr_id_to_video: { number_of_stars: 1 } repositories: { - url: "https://github.com/vita-epfl/openpifpafwebdemo" - owner: "vita-epfl" - framework: FRAMEWORK_PYTORCH - number_of_stars: 81 - description: "Web browser based demo of OpenPifPaf." + url: "https://github.com/thanhtrung98/Pose_estimation" + owner: "thanhtrung98" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } methods: { name: "Composite Fields" @@ -32052,7 +32362,7 @@ pr_id_to_video: { url: "https://github.com/hustvl/EfficientPose" owner: "hustvl" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 7 } methods: { name: "ReLU" @@ -32092,19 +32402,6 @@ pr_id_to_video: { authors: "Christoph Feichtenhofer" authors: "David Grangier" authors: "Michael Auli" - repositories: { - url: "https://github.com/sjtuxcx/ITES" - owner: "sjtuxcx" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - } - repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." - } repositories: { url: "https://github.com/philipNoonan/OPVP3D" owner: "philipNoonan" @@ -32116,7 +32413,7 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/VideoPose3D" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 2438 + number_of_stars: 2510 description: "Efficient 3D human pose estimation in video using 2D keypoint trajectories" } repositories: { @@ -32129,7 +32426,7 @@ pr_id_to_video: { url: "https://github.com/garyzhao/SemGCN" owner: "garyzhao" framework: FRAMEWORK_PYTORCH - number_of_stars: 315 + number_of_stars: 319 description: "The Pytorch implementation for \"Semantic Graph Convolutional Networks for 3D Human Pose Regression\" (CVPR 2019)." } repositories: { @@ -32138,6 +32435,19 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH number_of_stars: 23 } + repositories: { + url: "https://github.com/open-mmlab/mmpose" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1081 + description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + } + repositories: { + url: "https://github.com/sjtuxcx/ITES" + owner: "sjtuxcx" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + } } papers: { paper_id: "generating-multiple-hypotheses-for-3d-human" @@ -32149,20 +32459,20 @@ pr_id_to_video: { } authors: "Chen Li" authors: "Gim Hee Lee" - repositories: { - url: "https://github.com/vnmr/JointVideoPose3D" - owner: "vnmr" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } repositories: { is_official: true url: "https://github.com/chaneyddtt/Generating-Multiple-Hypotheses-for-3D-Human-Pose-Estimation-with-Mixture-Density-Network" owner: "chaneyddtt" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 80 + number_of_stars: 81 description: "Code for our CVPR2019 paper: Generating Multiple Hypotheses for 3D Human Pose Estimation with Mixture Density Network" } + repositories: { + url: "https://github.com/vnmr/JointVideoPose3D" + owner: "vnmr" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } } papers: { paper_id: "lightweight-3d-human-pose-estimation-network" @@ -32207,7 +32517,7 @@ pr_id_to_video: { url: "https://github.com/bastianwandt/RepNet" owner: "bastianwandt" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 56 + number_of_stars: 58 description: "This is the original RepNet implementation" } } @@ -32227,15 +32537,15 @@ pr_id_to_video: { url: "https://github.com/deepmind/Temporal-3D-Pose-Kinetics" owner: "deepmind" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 190 + number_of_stars: 192 description: "Exploiting temporal context for 3D human pose estimation in the wild: 3D poses for the Kinetics dataset" } } video: { video_id: "KFL4-md3Nyg" video_title: "PR-262: Fast Human Pose Estimation (CVPR 2019)" - number_of_likes: 16 - number_of_views: 1105 + number_of_likes: 19 + number_of_views: 1123 published_date: { seconds: 1595168353 } @@ -32259,13 +32569,6 @@ pr_id_to_video: { authors: "Michael Fauser" authors: "David Sattlegger" authors: "Carsten Steger" - repositories: { - url: "https://github.com/TaikiInoue/STAD" - owner: "TaikiInoue" - framework: FRAMEWORK_PYTORCH - number_of_stars: 39 - description: "Uninformed Students: Student-Teacher Anomaly Detection with Discriminative Latent Embeddings" - } repositories: { url: "https://github.com/LuyaooChen/uninformed-students-pytorch" owner: "LuyaooChen" @@ -32277,9 +32580,16 @@ pr_id_to_video: { url: "https://github.com/denguir/student-teacher-anomaly-detection" owner: "denguir" framework: FRAMEWORK_PYTORCH - number_of_stars: 67 + number_of_stars: 72 description: "Student–Teacher Anomaly Detection with Discriminative Latent Embeddings" } + repositories: { + url: "https://github.com/TaikiInoue/STAD" + owner: "TaikiInoue" + framework: FRAMEWORK_PYTORCH + number_of_stars: 46 + description: "Uninformed Students: Student-Teacher Anomaly Detection with Discriminative Latent Embeddings" + } } papers: { paper_id: "inverse-transform-autoencoder-for-anomaly" @@ -32326,7 +32636,7 @@ pr_id_to_video: { url: "https://github.com/fabiocarrara/cbigan-ad" owner: "fabiocarrara" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 11 + number_of_stars: 12 description: "Code to reproduce 'Combining GANs and AutoEncodersfor efficient anomaly detection'" } methods: { @@ -32353,7 +32663,7 @@ pr_id_to_video: { url: "https://github.com/pankajmishra000/VT-ADL" owner: "pankajmishra000" framework: FRAMEWORK_OTHERS - number_of_stars: 6 + number_of_stars: 8 description: "A Vision Transformer Network for Image Anomaly Detection and Localization" } } @@ -32394,56 +32704,48 @@ pr_id_to_video: { url: "https://github.com/rvorias/ind_knn_ad" owner: "rvorias" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "Industrial knn-based anomaly detection for images" - } - repositories: { - url: "https://github.com/remmarp/PaDiM-TF" - owner: "remmarp" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "[TF 2.x] PaDiM - unofficial tensorflow implementation of the paper 'a Patch Distribution Modeling Framework for Anomaly Detection and Localization'." - } - repositories: { - url: "https://github.com/Pangoraw/PaDiM" - owner: "Pangoraw" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Unofficial re-implementation of PaDiM: A Patch Distribution Modeling Framework for Anomaly Detection and Localization" + number_of_stars: 12 + description: "Industrial knn-based anomaly detection for images. Visit streamlit link to check out the demo." } repositories: { url: "https://github.com/taikiinoue45/PaDiM" owner: "taikiinoue45" framework: FRAMEWORK_PYTORCH - number_of_stars: 16 + number_of_stars: 18 description: "PaDiM: a Patch Distribution Modeling Framework for Anomaly Detection and Localization" } repositories: { url: "https://github.com/xiahaifeng1995/PaDiM-Anomaly-Detection-Localization-master" owner: "xiahaifeng1995" framework: FRAMEWORK_PYTORCH - number_of_stars: 144 + number_of_stars: 160 description: "This is an unofficial implementation of the paper “PaDiM: a Patch Distribution Modeling Framework for Anomaly Detection and Localization”." } - methods: { - name: "ResNet" - full_name: "Residual Network" - description: "**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. They stack [residual blocks](https://paperswithcode.com/method/residual-block) ontop of each other to form network: e.g. a ResNet-50 has fifty layers using these blocks. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}(x)$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}(x):=\\mathcal{H}(x)-x$. The original mapping is recast into $\\mathcal{F}(x)+x$.\r\n\r\nThere is empirical evidence that these types of network are easier to optimize, and can gain accuracy from considerably increased depth." + repositories: { + url: "https://github.com/Pangoraw/PaDiM" + owner: "Pangoraw" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "Unofficial re-implementation of PaDiM: A Patch Distribution Modeling Framework for Anomaly Detection and Localization" } - methods: { - name: "Wide Residual Block" - full_name: "Wide Residual Block" - description: "A **Wide Residual Block** is a type of residual block that utilises two conv 3x3 layers (with dropout). This is wider than other variants of residual blocks (for instance [bottleneck residual blocks](https://paperswithcode.com/method/bottleneck-residual-block)). It was proposed as part of the [WideResNet](https://paperswithcode.com/method/wideresnet) CNN architecture." + repositories: { + url: "https://github.com/remmarp/PaDiM-TF" + owner: "remmarp" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 5 + description: "[TF 2.x] PaDiM - unofficial tensorflow implementation of the paper 'a Patch Distribution Modeling Framework for Anomaly Detection and Localization'." } - methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + repositories: { + url: "https://github.com/ingbeeedd/PaDiM-EfficientNet" + owner: "ingbeeedd" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "PaDiM based EfficientNet and improve the inference time" } methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { name: "Global Average Pooling" @@ -32451,29 +32753,44 @@ pr_id_to_video: { description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." } methods: { - name: "EfficientNet" - full_name: "EfficientNet" - description: "**EfficientNet** is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a *compound coefficient*. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients. For example, if we want to use $2^N$ times more computational resources, then we can simply increase the network depth by $\\alpha ^ N$, width by $\\beta ^ N$, and image size by $\\gamma ^ N$, where $\\alpha, \\beta, \\gamma$ are constant coefficients determined by a small grid search on the original small model. EfficientNet uses a compound coefficient $\\phi$ to uniformly scales network width, depth, and resolution in a principled way.\r\n\r\nThe compound scaling method is justified by the intuition that if the input image is bigger, then the network needs more layers to increase the receptive field and more channels to capture more fine-grained patterns on the bigger image.\r\n\r\nThe base EfficientNet-B0 network is based on the inverted bottleneck residual blocks of [MobileNetV2](https://paperswithcode.com/method/mobilenetv2), in addition to squeeze-and-excitation blocks.\r\n\r\n EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters." + name: "Inverted Residual Block" + full_name: "Inverted Residual Block" + description: "An **Inverted Residual Block**, sometimes called an **MBConv Block**, is a type of residual block used for image models that uses an inverted structure for efficiency reasons. It was originally proposed for the [MobileNetV2](https://paperswithcode.com/method/mobilenetv2) CNN architecture. It has since been reused for several mobile-optimized CNNs.\r\n\r\nA traditional [Residual Block](https://paperswithcode.com/method/residual-block) has a wide -> narrow -> wide structure with the number of channels. The input has a high number of channels, which are compressed with a 1x1 convolution. The number of channels is then increased again with a 1x1 convolution so input and output can be added. \r\n\r\nIn contrast, an Inverted Residual Block follows a narrow -> wide -> narrow approach, hence the inversion. We first widen with a 1x1 convolution, then use a 3x3 depthwise convolution (which greatly reduces the number of parameters), then we use a 1x1 convolution to reduce the number of channels so input and output can be added." } methods: { - name: "RMSProp" - full_name: "RMSProp" - description: "**RMSProp** is an unpublished adaptive learning rate optimizer [proposed by Geoff Hinton](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). The motivation is that the magnitude of gradients can differ for different weights, and can change during learning, making it hard to choose a single global learning rate. RMSProp tackles this by keeping a moving average of the squared gradient and adjusting the weight updates by this magnitude. The gradient updates are performed as:\r\n\r\n$$E\\left[g^{2}\\right]\\_{t} = \\gamma E\\left[g^{2}\\right]\\_{t-1} + \\left(1 - \\gamma\\right) g^{2}\\_{t}$$\r\n\r\n$$\\theta\\_{t+1} = \\theta\\_{t} - \\frac{\\eta}{\\sqrt{E\\left[g^{2}\\right]\\_{t} + \\epsilon}}g\\_{t}$$\r\n\r\nHinton suggests $\\gamma=0.9$, with a good default for $\\eta$ as $0.001$.\r\n\r\nImage: [Alec Radford](https://twitter.com/alecrad)" + name: "WideResNet" + full_name: "WideResNet" + description: "**Wide Residual Networks** are a variant on [ResNets](https://paperswithcode.com/method/resnet) where we decrease depth and increase the width of residual networks. This is achieved through the use of wide residual blocks." + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { name: "Depthwise Separable Convolution" full_name: "Depthwise Separable Convolution" description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } methods: { name: "Depthwise Convolution" full_name: "Depthwise Convolution" description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } } papers: { @@ -32549,14 +32866,14 @@ pr_id_to_video: { url: "https://github.com/xiahaifeng1995/STPM-Anomaly-Detection-Localization-master" owner: "xiahaifeng1995" framework: FRAMEWORK_PYTORCH - number_of_stars: 14 + number_of_stars: 16 description: "This is an unofficial implementation of the paper “Student-Teacher Feature Pyramid Matching for Unsupervised Anomaly Detection”." } repositories: { url: "https://github.com/hcw-00/STPM_anomaly_detection" owner: "hcw-00" framework: FRAMEWORK_PYTORCH - number_of_stars: 22 + number_of_stars: 26 description: "Unofficial pytorch implementation of Student-Teacher Feature Pyramid Matching for Unsupervised Anomaly Detection" } } @@ -32564,7 +32881,7 @@ pr_id_to_video: { video_id: "7skd4bdE_jg" video_title: "PR-263: MVTec AD-A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection" number_of_likes: 14 - number_of_views: 796 + number_of_views: 834 published_date: { seconds: 1595169052 } @@ -32691,7 +33008,7 @@ pr_id_to_video: { url: "https://github.com/som-shahlab/trove" owner: "som-shahlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 28 + number_of_stars: 31 description: "Weakly supervised medical named entity classification" } methods: { @@ -32748,7 +33065,7 @@ pr_id_to_video: { video: { video_id: "0_2QGyWDkdE" video_title: "PR-264: Medical Device Surveillance with Electronic Health Records" - number_of_views: 178 + number_of_views: 180 published_date: { seconds: 1596382758 } @@ -32775,7 +33092,7 @@ pr_id_to_video: { url: "https://github.com/cnguyen10/few_shot_meta_learning" owner: "cnguyen10" framework: FRAMEWORK_PYTORCH - number_of_stars: 93 + number_of_stars: 104 description: "Implementations of many meta-learning algorithms to solve the few-shot learning problem in Pytorch" } } @@ -32804,7 +33121,7 @@ pr_id_to_video: { url: "https://github.com/jaesik817/bmaml" owner: "jaesik817" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 38 + number_of_stars: 39 description: "This repository contains implementations of the paper, Bayesian Model-Agnostic Meta-Learning." } } @@ -32840,16 +33157,6 @@ pr_id_to_video: { authors: "Deirdre Quillen" authors: "Chelsea Finn" authors: "Sergey Levine" - repositories: { - url: "https://github.com/waterhorse1/Pearl_relabel" - owner: "waterhorse1" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/lujiayou123/Off-Policy-Meta-Reinforcement-Learning-via-Unsupervised-Domain-Translation" - owner: "lujiayou123" - framework: FRAMEWORK_OTHERS - } repositories: { url: "https://github.com/victorchan314/cs287_final_project" owner: "victorchan314" @@ -32860,9 +33167,19 @@ pr_id_to_video: { url: "https://github.com/katerakelly/oyster" owner: "katerakelly" framework: FRAMEWORK_PYTORCH - number_of_stars: 320 + number_of_stars: 321 description: "Implementation of Efficient Off-policy Meta-learning via Probabilistic Context Variables (PEARL)" } + repositories: { + url: "https://github.com/lujiayou123/Off-Policy-Meta-Reinforcement-Learning-via-Unsupervised-Domain-Translation" + owner: "lujiayou123" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/waterhorse1/Pearl_relabel" + owner: "waterhorse1" + framework: FRAMEWORK_OTHERS + } } papers: { paper_id: "uncertainty-in-model-agnostic-meta-learning" @@ -32880,7 +33197,7 @@ pr_id_to_video: { url: "https://github.com/cnguyen10/few_shot_meta_learning" owner: "cnguyen10" framework: FRAMEWORK_PYTORCH - number_of_stars: 93 + number_of_stars: 104 description: "Implementations of many meta-learning algorithms to solve the few-shot learning problem in Pytorch" } } @@ -32924,8 +33241,8 @@ pr_id_to_video: { video: { video_id: "EVomYa9HWd8" video_title: "PR-265: Probabilistic Model-Agnostic Meta-Learning" - number_of_likes: 16 - number_of_views: 673 + number_of_likes: 21 + number_of_views: 726 published_date: { seconds: 1596868115 } @@ -32960,7 +33277,7 @@ pr_id_to_video: { url: "https://github.com/lliuz/ARFlow" owner: "lliuz" framework: FRAMEWORK_PYTORCH - number_of_stars: 152 + number_of_stars: 155 description: "The official PyTorch implementation of the paper \"Learning by Analogy: Reliable Supervision from Transformations for Unsupervised Optical Flow Estimation\"." } } @@ -32975,13 +33292,6 @@ pr_id_to_video: { authors: "Hengli Wang" authors: "Rui Fan" authors: "Ming Liu" - repositories: { - url: "https://github.com/twhui/LiteFlowNet3" - owner: "twhui" - framework: FRAMEWORK_OTHERS - number_of_stars: 132 - description: "LiteFlowNet3: Resolving Correspondence Ambiguity for More Accurate Optical Flow Estimation, ECCV 2020" - } repositories: { is_official: true url: "https://github.com/hlwang1124/CoT-AMFlow" @@ -33021,7 +33331,7 @@ pr_id_to_video: { url: "https://github.com/PruneTruong/DenseMatching" owner: "PruneTruong" framework: FRAMEWORK_PYTORCH - number_of_stars: 82 + number_of_stars: 136 description: "Dense matching library based on PyTorch" } } @@ -33055,7 +33365,7 @@ pr_id_to_video: { url: "https://github.com/jytime/DICL-Flow" owner: "jytime" framework: FRAMEWORK_PYTORCH - number_of_stars: 134 + number_of_stars: 137 } } papers: { @@ -33092,15 +33402,15 @@ pr_id_to_video: { url: "https://github.com/zhangzjn/DTVNet" owner: "zhangzjn" framework: FRAMEWORK_PYTORCH - number_of_stars: 34 + number_of_stars: 35 description: "DTVNet: Dynamic Time-lapse Video Generation via Single Still Image, ECCV'20 Spotlight." } } video: { video_id: "F3ZMMqWKAwk" video_title: "PR-266: Learning by Analogy: Reliable Supervision From Transformations for Unsupervised O.F.E" - number_of_likes: 3 - number_of_views: 315 + number_of_likes: 5 + number_of_views: 326 published_date: { seconds: 1597069164 } @@ -33198,7 +33508,7 @@ pr_id_to_video: { video_id: "WuYXSv7nuPw" video_title: "PR-267: MultiCAM:Multiple class activation mapping for aircraft recognition in remote sensing images" number_of_likes: 3 - number_of_views: 296 + number_of_views: 300 published_date: { seconds: 1596986274 } @@ -33224,45 +33534,45 @@ pr_id_to_video: { authors: "Jiang Wang" authors: "Alan Yuille" authors: "Quoc V. Le" - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } - repositories: { - url: "https://github.com/yaoshiang/MobileNetV2-CIFAR-Cleverhans" - owner: "yaoshiang" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/KoheiNika/repository_new" - owner: "KoheiNika" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } repositories: { url: "https://github.com/tingxueronghua/pytorch-classification-advprop" owner: "tingxueronghua" framework: FRAMEWORK_PYTORCH - number_of_stars: 59 + number_of_stars: 65 } repositories: { is_official: true url: "https://github.com/tensorflow/tpu" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4371 + number_of_stars: 4415 description: "Reference models and tools for Cloud TPUs." } repositories: { url: "https://github.com/osmr/imgclsmob" owner: "osmr" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 + number_of_stars: 2268 description: "Sandbox for training deep learning networks" } + repositories: { + url: "https://github.com/KoheiNika/repository_new" + owner: "KoheiNika" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/yaoshiang/MobileNetV2-CIFAR-Cleverhans" + owner: "yaoshiang" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/rwightman/pytorch-image-models" + owner: "rwightman" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12196 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + } methods: { name: "Average Pooling" full_name: "Average Pooling" @@ -33444,7 +33754,7 @@ pr_id_to_video: { url: "https://github.com/revbucket/mister_ed" owner: "revbucket" framework: FRAMEWORK_PYTORCH - number_of_stars: 61 + number_of_stars: 63 description: "Pytorch Adversarial Attack Framework" } } @@ -33452,7 +33762,7 @@ pr_id_to_video: { video_id: "wq8vgC7ltmc" video_title: "PR-268: Adversarial Examples Improve Image Recognition" number_of_likes: 6 - number_of_views: 396 + number_of_views: 399 published_date: { seconds: 1597841881 } @@ -33477,44 +33787,27 @@ pr_id_to_video: { authors: "Xiaoyong Shen" authors: "Jianping Shi" authors: "Jiaya Jia" - repositories: { - url: "https://github.com/GuangyanZhang/SCNN-Deeplabv3-bisenet-icnet" - owner: "GuangyanZhang" - framework: FRAMEWORK_OTHERS - number_of_stars: 14 - } - repositories: { - url: "https://github.com/mattangus/fast-semantic-segmentation" - owner: "mattangus" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 - description: "Forked from https://github.com/oandrienko/fast-semantic-segmentation" - } - repositories: { - url: "https://github.com/victorpham1997/Local_ICNet" - owner: "victorpham1997" - framework: FRAMEWORK_TENSORFLOW - } - repositories: { - url: "https://github.com/lisilin013/ICNet-tensorflow-ros" - owner: "lisilin013" - framework: FRAMEWORK_TENSORFLOW - } repositories: { is_official: true url: "https://github.com/hszhao/ICNet" owner: "hszhao" framework: FRAMEWORK_OTHERS - number_of_stars: 577 + number_of_stars: 578 description: "ICNet for Real-Time Semantic Segmentation on High-Resolution Images, ECCV2018" } repositories: { url: "https://github.com/hellochick/ICNet-tensorflow" owner: "hellochick" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 398 + number_of_stars: 400 description: "TensorFlow-based implementation of \"ICNet for Real-Time Semantic Segmentation on High-Resolution Images\"." } + repositories: { + url: "https://github.com/GuangyanZhang/SCNN-Deeplabv3-bisenet-icnet" + owner: "GuangyanZhang" + framework: FRAMEWORK_OTHERS + number_of_stars: 15 + } repositories: { url: "https://github.com/oandrienko/fast-semantic-segmentation" owner: "oandrienko" @@ -33526,9 +33819,26 @@ pr_id_to_video: { url: "https://github.com/osmr/imgclsmob" owner: "osmr" framework: FRAMEWORK_OTHERS - number_of_stars: 2233 + number_of_stars: 2268 description: "Sandbox for training deep learning networks" } + repositories: { + url: "https://github.com/lisilin013/ICNet-tensorflow-ros" + owner: "lisilin013" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/victorpham1997/Local_ICNet" + owner: "victorpham1997" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/mattangus/fast-semantic-segmentation" + owner: "mattangus" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Forked from https://github.com/oandrienko/fast-semantic-segmentation" + } methods: { name: "Softmax" full_name: "Softmax" @@ -33570,7 +33880,7 @@ pr_id_to_video: { url: "https://github.com/feinanshan/FANet" owner: "feinanshan" framework: FRAMEWORK_PYTORCH - number_of_stars: 27 + number_of_stars: 32 } } papers: { @@ -33617,14 +33927,14 @@ pr_id_to_video: { url: "https://github.com/huaifeng1993/DFANet" owner: "huaifeng1993" framework: FRAMEWORK_PYTORCH - number_of_stars: 227 + number_of_stars: 228 description: "reimpliment of DFANet: Deep Feature Aggregation for Real-Time Semantic Segmentation" } repositories: { url: "https://github.com/j-a-lin/DFANet_PyTorch" owner: "j-a-lin" framework: FRAMEWORK_PYTORCH - number_of_stars: 26 + number_of_stars: 27 description: "Unofficial implementation of Deep Feature Aggregation Networks for real-time semantic segmentation." } } @@ -33656,34 +33966,34 @@ pr_id_to_video: { authors: "Gang Yu" authors: "Chunhua Shen" authors: "Nong Sang" - repositories: { - url: "https://github.com/PaddlePaddle/PaddleSeg" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 1763 - description: "End-to-end image segmentation kit based on PaddlePaddle. " - } repositories: { url: "https://github.com/CoinCheung/BiSeNet" owner: "CoinCheung" framework: FRAMEWORK_PYTORCH - number_of_stars: 697 + number_of_stars: 727 description: "Add bisenetv2. My implementation of BiSeNet" } repositories: { url: "https://github.com/MaybeShewill-CV/bisenetv2-tensorflow" owner: "MaybeShewill-CV" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 156 + number_of_stars: 159 description: "Unofficial tensorflow implementation of real-time scene image segmentation model \"BiSeNet V2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation\"" } repositories: { url: "https://github.com/ycszen/BiSeNet" owner: "ycszen" framework: FRAMEWORK_OTHERS - number_of_stars: 124 + number_of_stars: 122 description: "Implementation of BiSeNet and BiSeNetV2" } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleSeg" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2156 + description: "End-to-end image segmentation kit based on PaddlePaddle. " + } } papers: { paper_id: "csrnet-cascaded-selective-resolution-network" @@ -33714,7 +34024,7 @@ pr_id_to_video: { url: "https://github.com/AngeLouCN/CFPNet" owner: "AngeLouCN" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 + number_of_stars: 6 description: "A pytorch-based real-time segmentation model for autonomous driving" } methods: { @@ -33761,7 +34071,7 @@ pr_id_to_video: { video_id: "vWz0jgAtYo0" video_title: "PR-269: ICNet for Real-Time Semantic Segmentation on High-Resolution Images" number_of_likes: 11 - number_of_views: 562 + number_of_views: 570 published_date: { seconds: 1597589167 } @@ -33792,40 +34102,40 @@ pr_id_to_video: { authors: "Shumin Han" authors: "Errui Ding" authors: "Shilei Wen" - repositories: { - url: "https://github.com/simplew2011/YOLO_ZOO" - owner: "simplew2011" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - } - repositories: { - url: "https://github.com/wuzhihao7788/yolodet-pytorch" - owner: "wuzhihao7788" - framework: FRAMEWORK_PYTORCH - number_of_stars: 282 - description: "reproduce the YOLO series of papers in pytorch, including YOLOv4, PP-YOLO, YOLOv5,YOLOv3, etc." - } repositories: { url: "https://github.com/PaddlePaddle/PaddleClas" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 2085 + number_of_stars: 2166 description: "A treasure chest for visual recognition powered by PaddlePaddle" } + repositories: { + url: "https://github.com/Sharpiless/pp-yolo-vehcile-detection-and-distance-estimation-for-self-driving" + owner: "Sharpiless" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "a" + } repositories: { is_official: true url: "https://github.com/PaddlePaddle/PaddleDetection" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 4334 + number_of_stars: 4484 description: "Object detection and instance segmentation toolkit based on PaddlePaddle." } repositories: { - url: "https://github.com/Sharpiless/pp-yolo-vehcile-detection-and-distance-estimation-for-self-driving" - owner: "Sharpiless" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "a" + url: "https://github.com/wuzhihao7788/yolodet-pytorch" + owner: "wuzhihao7788" + framework: FRAMEWORK_PYTORCH + number_of_stars: 296 + description: "reproduce the YOLO series of papers in pytorch, including YOLOv4, PP-YOLO, YOLOv5,YOLOv3, etc." + } + repositories: { + url: "https://github.com/simplew2011/YOLO_ZOO" + owner: "simplew2011" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } methods: { name: "ReLU" @@ -33904,7 +34214,7 @@ pr_id_to_video: { url: "https://github.com/PaddlePaddle/PaddleDetection" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 4334 + number_of_stars: 4484 description: "Object detection and instance segmentation toolkit based on PaddlePaddle." } methods: { @@ -33936,115 +34246,119 @@ pr_id_to_video: { authors: "Ross Girshick" authors: "Ali Farhadi" repositories: { - url: "https://github.com/westerndigitalcorporation/YOLOv3-in-PyTorch" - owner: "westerndigitalcorporation" - framework: FRAMEWORK_PYTORCH - number_of_stars: 84 - description: "YOLOv3 in PyTorch with training and inference module implemented." - } - repositories: { - url: "https://github.com/AlexeyAB/darknet" - owner: "AlexeyAB" + url: "https://github.com/eric-erki/android-yolo" + owner: "eric-erki" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16682 - description: "YOLOv4 / Scaled-YOLOv4 / YOLO - Neural Networks for Object Detection (Windows and Linux version of Darknet )" + number_of_stars: 2 + description: "Real-time object detection on Android using the YOLO network with TensorFlow" } repositories: { - url: "https://github.com/DevBruce/YOLOv1-TF2" - owner: "DevBruce" + url: "https://github.com/WaelOuni/MergeTenserFlowWithOdb" + owner: "WaelOuni" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1 - description: "YOLOv1 implementation with TensorFlow2" } repositories: { - url: "https://github.com/msuhail1997/YOLO-Pytorch-Object_Detection" - owner: "msuhail1997" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/manankshastri/Object-Detection" + owner: "manankshastri" + framework: FRAMEWORK_TENSORFLOW + description: "Object Detection on a car detection dataset using YOLO" } repositories: { - url: "https://github.com/jalotra/Queue-Detection" - owner: "jalotra" - framework: FRAMEWORK_PYTORCH - description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue{q}, how many people are standing in queue{Q}.\"" + url: "https://github.com/natanielruiz/android-yolo" + owner: "natanielruiz" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 607 + description: "Real-time object detection on Android using the YOLO network with TensorFlow" } repositories: { - url: "https://github.com/jalotra/Queue-Detection-" - owner: "jalotra" - framework: FRAMEWORK_PYTORCH - description: "A naive Algorithm that uses People Detection and Convex Hull as subroutines to solve this problem: \"Given an image of people standing in a queue{q}, how many people are standing in queue{Q}.\"" + url: "https://github.com/Ereebay/Deep-Learning-Documents" + owner: "Ereebay" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Documents for DAI group" } repositories: { - url: "https://github.com/TeamML-2021/knowledge-base" - owner: "TeamML-2021" - framework: FRAMEWORK_OTHERS + url: "https://github.com/noelcodes/YOLO" + owner: "noelcodes" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "Exploring YOLO. Credits to Mark Jay and darkflow." } repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/object_detection/you_only_look_once" - owner: "object_detection" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RobbertBrand/Yolo-Tensorflow-Implementation" + owner: "RobbertBrand" + framework: FRAMEWORK_TENSORFLOW number_of_stars: 2 - description: "ai codebase" + description: "A Yolo object detection implementation in Tensorflow, trainable using Tensorflow optimizers like ADAM. " } repositories: { - url: "https://github.com/hamidriasat/Computer-Vision-and-Deep-Learning" - owner: "hamidriasat" + url: "https://github.com/Stick-To/YOLO-TF" + owner: "Stick-To" framework: FRAMEWORK_TENSORFLOW + number_of_stars: 11 + description: "YOLOv2 YOLOv3 in pure tensorflow" } repositories: { - url: "https://github.com/ritesh2448/Text-Detection-And-Recognition" - owner: "ritesh2448" + url: "https://github.com/keshav47/Face-Recognition-And-Verification" + owner: "keshav47" framework: FRAMEWORK_TENSORFLOW + description: "We Recognize face using One Shot Learning and Face Verification is done using Triplet loss function on a pre-trained Inception Model " } - methods: { - name: "Non Maximum Suppression" - full_name: "Non Maximum Suppression" - description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + repositories: { + url: "https://github.com/sprenkle/VectorCards" + owner: "sprenkle" + framework: FRAMEWORK_OTHERS + number_of_stars: 10 } methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + name: "RoIPool" + full_name: "RoIPool" + description: "**Region of Interest Pooling**, or **RoIPool**, is an operation for extracting a small feature map (e.g., $7×7$) from each RoI in detection and segmentation based tasks. Features are extracted from each candidate box, and thereafter in models like Fast R-CNN, are then classified and bounding box regression performed.\r\n\r\nThe actual scaling to, e.g., $7×7$, occurs by dividing the region proposal into equally sized sections, finding the largest value in each section, and then copying these max values to the output buffer. In essence, **RoIPool** is max pooling on a discrete grid based on a box.\r\n\r\nImage Source: [Joyce Xu](https://towardsdatascience.com/deep-learning-for-object-detection-a-comprehensive-review-73930816d8d9)" } methods: { - name: "Max Pooling" - full_name: "Max Pooling" - description: "**Max Pooling** is a pooling operation that calculates the maximum value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs.\r\n\r\nImage Source: [here](https://computersciencewiki.org/index.php/File:MaxpoolSample2.png)" + name: "Step Decay" + full_name: "Step Decay" + description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" } methods: { name: "1x1 Convolution" full_name: "1x1 Convolution" description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } - methods: { - name: "ReLU" - full_name: "Rectified Linear Units" - description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" - } methods: { name: "Dropout" full_name: "Dropout" description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "Step Decay" - full_name: "Step Decay" - description: "**Step Decay** is a learning rate schedule that drops the learning rate by a factor every few epochs, where the number of epochs is a hyperparameter.\r\n\r\nImage Credit: [Suki Lau](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)" + name: "YOLOv1" + full_name: "YOLOv1" + description: "**YOLOv1** is a single-stage object detection model. Object detection is framed as a regression problem to spatially separated bounding boxes and associated class probabilities. A single neural network predicts bounding boxes and class probabilities directly from full images in one evaluation. Since the whole detection pipeline is a single network, it can be optimized end-to-end directly on detection performance. \r\n\r\nThe network uses features from the entire image to predict each bounding box. It also predicts all bounding boxes across all classes for an image simultaneously. This means the network reasons globally about the full image and all the objects in the image." } methods: { - name: "Random Resized Crop" - full_name: "Random Resized Crop" - description: "**RandomResizedCrop** is a type of image data augmentation where a crop of random size of the original size and a random aspect ratio of the original aspect ratio is made. This crop is finally resized to given size.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." } methods: { - name: "SGD with Momentum" - full_name: "SGD with Momentum" - description: "**SGD with Momentum** is a stochastic optimization method that adds a momentum term to regular stochastic gradient descent:\r\n\r\n$$v\\_{t} = \\gamma{v}\\_{t-1} + \\eta\\nabla\\_{\\theta}J\\left(\\theta\\right)$$\r\n$$\\theta\\_{t} = \\theta\\_{t-1} - v\\_{t} $$\r\n\r\nA typical value for $\\gamma$ is $0.9$. The momentum name comes from an analogy to physics, such as ball accelerating down a slope. In the case of weight updates, we can think of the weights as a particle traveling through parameter space which incurs acceleration from the gradient of the loss.\r\n\r\nImage Source: [Juan Du](https://www.researchgate.net/figure/The-compare-of-the-SGD-algorithms-with-and-without-momentum-Take-Task-1-as-example-The_fig1_333469047)" + name: "VGG" + full_name: "VGG" + description: "**VGG** is a classical convolutional neural network architecture. It was based on an analysis of how to increase the depth of such networks. The network utilises small 3 x 3 filters. Otherwise the network is characterized by its simplicity: the only other components being pooling layers and a fully connected layer.\r\n\r\nImage: [Davi Frossard](https://www.cs.toronto.edu/frossard/post/vgg16/)" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Non Maximum Suppression" + full_name: "Non Maximum Suppression" + description: "**Non Maximum Suppression** is a computer vision method that selects a single entity out of many overlapping entities (for example bounding boxes in object detection). The criteria is usually discarding entities that are below a given probability bound. With remaining entities we repeatedly pick the entity with the highest probability, output that as the prediction, and discard any remaining box where a $\\text{IoU} \\geq 0.5$ with the box output in the previous step.\r\n\r\nImage Credit: [Martin Kersner](https://github.com/martinkersner/non-maximum-suppression-cpp)" } } papers: { @@ -34166,15 +34480,20 @@ pr_id_to_video: { url: "https://github.com/PaddlePaddle/PaddleDetection" owner: "PaddlePaddle" framework: FRAMEWORK_OTHERS - number_of_stars: 4334 + number_of_stars: 4484 description: "Object detection and instance segmentation toolkit based on PaddlePaddle." } + methods: { + name: "PAFNet" + full_name: "Paddle Anchor Free Network" + description: "**PAFNet** is an anchor-free detector for object detection that removes pre-defined anchors and regresses the locations directly, which can achieve higher efficiency. The overall network is composed of a backbone, an up-sampling module, an AGS module, a localization branch and a regression branch. Specifically, ResNet50-vd is chosen as the backbone for server side, and MobileNetV3 for mobile side. Besides, for mobile side, we replace traditional convolution layers with lite convolution operators." + } } video: { video_id: "7v34cCE5H4k" video_title: "PR-270: PP-YOLO: An Effective and Efficient Implementation of Object Detector" - number_of_likes: 41 - number_of_views: 1949 + number_of_likes: 42 + number_of_views: 2007 published_date: { seconds: 1598192789 } @@ -34199,42 +34518,18 @@ pr_id_to_video: { authors: "Yunming Ye" authors: "Zhenguo Li" authors: "Xiuqiang He" - repositories: { - url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/rank/deepfm" - owner: "rank" - framework: FRAMEWORK_OTHERS - number_of_stars: 556 - description: "大规模推荐模型训练工具" - } - repositories: { - url: "https://github.com/NVIDIA/HugeCTR" - owner: "NVIDIA" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 444 - description: "HugeCTR is a high efficiency GPU framework designed for Click-Through-Rate (CTR) estimating training" - } - repositories: { - url: "https://github.com/codlife/NLP" - owner: "codlife" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/sandeepnair2812/Deep-Learning-Based-Search-and-Recommendation-System" - owner: "sandeepnair2812" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/shenweichen/DeepCTR" owner: "shenweichen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5130 + number_of_stars: 5247 description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." } repositories: { url: "https://github.com/Leavingseason/OpenLearning4DeepRecsys" owner: "Leavingseason" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 385 + number_of_stars: 389 description: "Some deep learning based recsys for open learning." } repositories: { @@ -34248,23 +34543,51 @@ pr_id_to_video: { url: "https://github.com/GitHub-HongweiZhang/prediction-flow" owner: "GitHub-HongweiZhang" framework: FRAMEWORK_PYTORCH - number_of_stars: 167 + number_of_stars: 173 description: "Deep-Learning based CTR models implemented by PyTorch" } repositories: { url: "https://github.com/shenweichen/DeepCTR-PyTorch" owner: "shenweichen" framework: FRAMEWORK_PYTORCH - number_of_stars: 1329 + number_of_stars: 1397 description: "【PyTorch】Easy-to-use,Modular and Extendible package of deep-learning based CTR models." } repositories: { url: "https://github.com/JianzhouZhan/Awesome-RecSystem-Models" owner: "JianzhouZhan" framework: FRAMEWORK_PYTORCH - number_of_stars: 209 + number_of_stars: 210 description: "Implements of Awesome RecSystem Models with PyTorch/TF2.0" } + repositories: { + url: "https://github.com/meabhishekkumar/iith_session_2020" + owner: "meabhishekkumar" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "material shared in IIT Hyderabad " + } + repositories: { + url: "https://github.com/objectc/DeepFM" + owner: "objectc" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 + description: "An TensorFlow 2 and Keras implementation of DeepFM" + } + repositories: { + url: "https://github.com/massquantity/LibRecommender" + owner: "massquantity" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 110 + description: "Versatile End-to-End Recommender System" + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/rank/deepfm" + owner: "rank" + framework: FRAMEWORK_OTHERS + number_of_stars: 594 + description: "大规模推荐模型训练工具" + } } papers: { paper_id: "deepfm-an-end-to-end-wide-deep-learning" @@ -34284,21 +34607,21 @@ pr_id_to_video: { url: "https://github.com/shenweichen/DeepCTR" owner: "shenweichen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5130 + number_of_stars: 5247 description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." } repositories: { url: "https://github.com/shenweichen/DeepCTR-PyTorch" owner: "shenweichen" framework: FRAMEWORK_PYTORCH - number_of_stars: 1329 + number_of_stars: 1397 description: "【PyTorch】Easy-to-use,Modular and Extendible package of deep-learning based CTR models." } repositories: { url: "https://github.com/DataCanvasIO/DeepTables" owner: "DataCanvasIO" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 610 + number_of_stars: 775 description: "DeepTables: Deep-learning Toolkit for Tabular data" } } @@ -34311,12 +34634,17 @@ pr_id_to_video: { seconds: 1599955200 } authors: "Harshit Pande" + repositories: { + url: "https://github.com/thinkall/deepfefm" + owner: "thinkall" + framework: FRAMEWORK_OTHERS + } repositories: { is_official: true url: "https://github.com/shenweichen/DeepCTR" owner: "shenweichen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5130 + number_of_stars: 5247 description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." } } @@ -34336,7 +34664,7 @@ pr_id_to_video: { url: "https://github.com/PaddlePaddle/PaddleRec/tree/release/2.1.0/models/rank/gateDnn" owner: "rank" framework: FRAMEWORK_OTHERS - number_of_stars: 556 + number_of_stars: 594 description: "大规模推荐模型训练工具" } } @@ -34354,26 +34682,26 @@ pr_id_to_video: { authors: "Xiaoyu Zhang" authors: "Liang Wang" repositories: { - is_official: true - url: "https://github.com/CRIPAC-DIG/Fi_GNN" - owner: "CRIPAC-DIG" + url: "https://github.com/JiangShaoYin/FIGNN" + owner: "JiangShaoYin" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 51 - description: "[CIKM 2019] Code and dataset for \"Fi-GNN: Modeling Feature Interactions via Graph Neural Networks for CTR Prediction\"" + number_of_stars: 13 + description: "利用图神经网络进行CTR预估" } repositories: { url: "https://github.com/CRIPAC-DIG/Fi_GNNs" owner: "CRIPAC-DIG" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 51 + number_of_stars: 55 description: "[CIKM 2019] Code and dataset for \"Fi-GNN: Modeling Feature Interactions via Graph Neural Networks for CTR Prediction\"" } repositories: { - url: "https://github.com/JiangShaoYin/FIGNN" - owner: "JiangShaoYin" + is_official: true + url: "https://github.com/CRIPAC-DIG/Fi_GNN" + owner: "CRIPAC-DIG" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 12 - description: "利用图神经网络进行CTR预估" + number_of_stars: 55 + description: "[CIKM 2019] Code and dataset for \"Fi-GNN: Modeling Feature Interactions via Graph Neural Networks for CTR Prediction\"" } } papers: { @@ -34391,18 +34719,18 @@ pr_id_to_video: { authors: "Aaron Flores" authors: "Guang Lin" repositories: { - is_official: true - url: "https://github.com/WayneDW/sDeepFwFM" + url: "https://github.com/WayneDW/DeepLight_Deep-Lightweight-Feature-Interactions" owner: "WayneDW" framework: FRAMEWORK_PYTORCH - number_of_stars: 68 + number_of_stars: 71 description: "Accelerating Inference for Recommendation Systems (WSDM'21)" } repositories: { - url: "https://github.com/WayneDW/DeepLight_Deep-Lightweight-Feature-Interactions" + is_official: true + url: "https://github.com/WayneDW/sDeepFwFM" owner: "WayneDW" framework: FRAMEWORK_PYTORCH - number_of_stars: 68 + number_of_stars: 71 description: "Accelerating Inference for Recommendation Systems (WSDM'21)" } } @@ -34483,21 +34811,21 @@ pr_id_to_video: { url: "https://github.com/shenweichen/DeepCTR" owner: "shenweichen" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5130 + number_of_stars: 5247 description: "Easy-to-use,Modular and Extendible package of deep-learning based CTR models ." } repositories: { url: "https://github.com/shenweichen/DeepCTR-PyTorch" owner: "shenweichen" framework: FRAMEWORK_PYTORCH - number_of_stars: 1329 + number_of_stars: 1397 description: "【PyTorch】Easy-to-use,Modular and Extendible package of deep-learning based CTR models." } repositories: { url: "https://github.com/DataCanvasIO/DeepTables" owner: "DataCanvasIO" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 610 + number_of_stars: 775 description: "DeepTables: Deep-learning Toolkit for Tabular data" } } @@ -34520,14 +34848,14 @@ pr_id_to_video: { url: "https://github.com/rener1199/deep_memory" owner: "rener1199" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 57 + number_of_stars: 58 } } video: { video_id: "zxXRGhSQ1f4" video_title: "PR-271: DeepFM: A Factorization-Machine based Neural Network for CTR Prediction" - number_of_likes: 12 - number_of_views: 1104 + number_of_likes: 14 + number_of_views: 1205 published_date: { seconds: 1598797388 } @@ -34604,7 +34932,7 @@ pr_id_to_video: { url: "https://github.com/jdcomsearch/poeem" owner: "jdcomsearch" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 14 + number_of_stars: 22 description: "A library for end-to-end learning of embedding index and retrieval model" } } @@ -34639,9 +34967,14 @@ pr_id_to_video: { url: "https://github.com/studio-ousia/bpr" owner: "studio-ousia" framework: FRAMEWORK_PYTORCH - number_of_stars: 67 + number_of_stars: 78 description: "Binary Passage Retriever (BPR) - an efficient passage retriever for open-domain question answering" } + methods: { + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" + } } papers: { paper_id: "fixed-length-protein-embeddings-using" @@ -34659,7 +34992,7 @@ pr_id_to_video: { url: "https://github.com/googleinterns/protein-embedding-retrieval" owner: "googleinterns" framework: FRAMEWORK_OTHERS - number_of_stars: 27 + number_of_stars: 29 } } papers: { @@ -34691,7 +35024,7 @@ pr_id_to_video: { url: "https://github.com/tensorflow/neural-structured-learning" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 835 + number_of_stars: 846 description: "Training neural models with structured signals." } } @@ -34712,7 +35045,7 @@ pr_id_to_video: { url: "https://github.com/google-research/tapas" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 674 + number_of_stars: 689 description: "End-to-end neural table-text understanding models." } methods: { @@ -34832,7 +35165,7 @@ pr_id_to_video: { video_id: "cU46yR-A0cs" video_title: "PR-272: Accelerating Large-Scale Inference with Anisotropic Vector Quantization" number_of_likes: 7 - number_of_views: 489 + number_of_views: 507 published_date: { seconds: 1598865302 } @@ -34863,25 +35196,11 @@ pr_id_to_video: { authors: "Oleksii Kuchaiev" authors: "Ganesh Venkatesh" authors: "Hao Wu" - repositories: { - url: "https://github.com/open-mmlab/mmpose" - owner: "open-mmlab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 982 - description: "OpenMMLab Pose Estimation Toolbox and Benchmark." - } - repositories: { - url: "https://github.com/deepmind/jmp" - owner: "deepmind" - framework: FRAMEWORK_OTHERS - number_of_stars: 31 - description: "JMP is a Mixed Precision library for JAX." - } repositories: { url: "https://github.com/NVIDIA/DeepRecommender" owner: "NVIDIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 1582 + number_of_stars: 1580 description: "Deep learning for recommender systems" } repositories: { @@ -34889,7 +35208,7 @@ pr_id_to_video: { url: "https://github.com/baidu-research/DeepBench" owner: "baidu-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 935 + number_of_stars: 937 description: "Benchmarking Deep Learning operations on different hardware" } repositories: { @@ -34899,6 +35218,20 @@ pr_id_to_video: { number_of_stars: 4 description: "Quantized (half-precision) CNNs via Efficient Neural Architecture Search (ENAS) " } + repositories: { + url: "https://github.com/open-mmlab/mmpose" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1081 + description: "OpenMMLab Pose Estimation Toolbox and Benchmark." + } + repositories: { + url: "https://github.com/deepmind/jmp" + owner: "deepmind" + framework: FRAMEWORK_OTHERS + number_of_stars: 33 + description: "JMP is a Mixed Precision library for JAX." + } methods: { name: "Convolution" full_name: "Convolution" @@ -35074,7 +35407,7 @@ pr_id_to_video: { url: "https://github.com/NVIDIA/OpenSeq2Seq" owner: "NVIDIA" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1408 + number_of_stars: 1410 description: "Toolkit for efficient experimentation with Speech Recognition, Text2Speech and NLP" } repositories: { @@ -35175,7 +35508,7 @@ pr_id_to_video: { video_id: "VEG7qjSa3kY" video_title: "PR-273: Mixed Precision Training" number_of_likes: 19 - number_of_views: 629 + number_of_views: 643 published_date: { seconds: 1599399041 } @@ -35201,18 +35534,18 @@ pr_id_to_video: { authors: "Sylvain Gelly" authors: "Mario Lucic" repositories: { - is_official: true - url: "https://github.com/google-research/google-research" - owner: "google-research" + url: "https://github.com/google-research/google-research/tree/master/mutual_information_representation_learning" + owner: "master" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 + number_of_stars: 18789 description: "Google Research" } repositories: { - url: "https://github.com/google-research/google-research/tree/master/mutual_information_representation_learning" - owner: "master" + is_official: true + url: "https://github.com/google-research/google-research" + owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 + number_of_stars: 18790 description: "Google Research" } } @@ -35231,18 +35564,6 @@ pr_id_to_video: { authors: "Phil Bachman" authors: "Adam Trischler" authors: "Yoshua Bengio" - repositories: { - url: "https://github.com/jqhoogland/rgpy" - owner: "jqhoogland" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Renormalization Group techniques implemented in python with special emphasis on Machine Learning-inspired methods." - } - repositories: { - url: "https://github.com/jtlai0921/infomax" - owner: "jtlai0921" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/HolenYHR/Deepinfo_pytorch" owner: "HolenYHR" @@ -35257,19 +35578,26 @@ pr_id_to_video: { number_of_stars: 118 description: "extract features by maximizing mutual information" } + repositories: { + url: "https://github.com/jqhoogland/rgpy" + owner: "jqhoogland" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Renormalization Group techniques implemented in python with special emphasis on Machine Learning-inspired methods." + } repositories: { is_official: true url: "https://github.com/rdevon/DIM" owner: "rdevon" framework: FRAMEWORK_PYTORCH - number_of_stars: 654 + number_of_stars: 662 description: "Deep InfoMax (DIM), or \"Learning Deep Representations by Mutual Information Estimation and Maximization\"" } repositories: { url: "https://github.com/DuaneNielsen/DeepInfomaxPytorch" owner: "DuaneNielsen" framework: FRAMEWORK_PYTORCH - number_of_stars: 237 + number_of_stars: 239 description: "Learning deep representations by mutual information estimation and maximization" } repositories: { @@ -35292,6 +35620,11 @@ pr_id_to_video: { number_of_stars: 13 description: "Code for the paper: Learning Adversarially Robust Representations via Worst-Case Mutual Information Maximization (https://arxiv.org/abs/2002.11798)" } + repositories: { + url: "https://github.com/jtlai0921/infomax" + owner: "jtlai0921" + framework: FRAMEWORK_TENSORFLOW + } } papers: { paper_id: "a-mutual-information-maximization-perspective-1" @@ -35377,9 +35710,14 @@ pr_id_to_video: { url: "https://github.com/zpeng27/GMI" owner: "zpeng27" framework: FRAMEWORK_PYTORCH - number_of_stars: 68 + number_of_stars: 71 description: "Graph Representation Learning via Graphical Mutual Information Maximization" } + methods: { + name: "GMI" + full_name: "Graphic Mutual Information" + description: "**Graphic Mutual Information**, or **GMI**, measures the correlation between input graphs and high-level hidden representations. GMI generalizes the idea of conventional mutual information computations from vector space to the graph domain where measuring mutual information from two aspects of node features and topological structure is indispensable. GMI exhibits several benefits: First, it is invariant to the isomorphic transformation of input graphs---an inevitable constraint in many existing graph representation learning algorithms; Besides, it can be efficiently estimated and maximized by current mutual information estimation methods such as MINE." + } } papers: { paper_id: "infograph-unsupervised-and-semi-supervised" @@ -35393,21 +35731,21 @@ pr_id_to_video: { authors: "Jordan Hoffmann" authors: "Vikas Verma" authors: "Jian Tang" - repositories: { - url: "https://github.com/hengruizhang98/InfoGraph" - owner: "hengruizhang98" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "DGL Implementation of InfoGraph model (ICLR 2020)" - } repositories: { is_official: true url: "https://github.com/fanyun-sun/InfoGraph" owner: "fanyun-sun" framework: FRAMEWORK_PYTORCH - number_of_stars: 139 + number_of_stars: 145 description: "Official code for \"InfoGraph: Unsupervised and Semi-supervised Graph-Level Representation Learning via Mutual Information Maximization\" (ICLR 2020, spotlight)" } + repositories: { + url: "https://github.com/hengruizhang98/InfoGraph" + owner: "hengruizhang98" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "DGL Implementation of InfoGraph model (ICLR 2020)" + } } papers: { paper_id: "unsupervised-hierarchical-graph-1" @@ -35466,65 +35804,71 @@ pr_id_to_video: { authors: "Ilya Sutskever" authors: "Pieter Abbeel" repositories: { - url: "https://github.com/yashgarg98/GAN" - owner: "yashgarg98" - framework: FRAMEWORK_OTHERS - description: "Some implementations of Generative Adversarial Networks.(DCGAN, InfoGAN)" + url: "https://github.com/SeonbeomKim/TensorFlow-InfoGAN" + owner: "SeonbeomKim" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3 + description: "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets" } repositories: { - url: "https://github.com/chandragupta0001/GAN/tree/master/info_gan" - owner: "master" - framework: FRAMEWORK_OTHERS + url: "https://github.com/sidneyp/bidirectional" + owner: "sidneyp" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 + description: "Complete project for paper \"Bidirectional Learning for Robust Neural Networks\"" } repositories: { - url: "https://github.com/elingaard/infogan-mnist" - owner: "elingaard" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "PyTorch implementation of InfoGAN" + url: "https://github.com/Murali81/InfoGAN" + owner: "Murali81" + framework: FRAMEWORK_OTHERS + description: "A demo script explaining InfoGAN on MNIST Dataset" } repositories: { - url: "https://github.com/amiryanj/socialways" - owner: "amiryanj" - framework: FRAMEWORK_PYTORCH - number_of_stars: 87 - description: "Social Ways: Learning Multi-Modal Distributions of Pedestrian Trajectories with GANs (CVPR 2019)" + url: "https://github.com/jonasz/progressive_infogan" + owner: "jonasz" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 183 + description: "Progressive training of GANs with Mutual Information Penalty" } repositories: { - url: "https://github.com/Neptune-Trojans/GANs" - owner: "Neptune-Trojans" + url: "https://github.com/VitoRazor/Gan_Architecture" + owner: "VitoRazor" framework: FRAMEWORK_TENSORFLOW - description: "Implementation of different GANs architectures" + number_of_stars: 1 } repositories: { - url: "https://github.com/zcemycl/Matlab-GAN" - owner: "zcemycl" - framework: FRAMEWORK_PYTORCH - number_of_stars: 78 - description: "MATLAB implementations of Generative Adversarial Networks -- from GAN to Pixel2Pixel, CycleGAN" + url: "https://github.com/jeanjerome/semisupervised_timeseries_infogan" + owner: "jeanjerome" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 15 + description: "A tensorflow implementation of informative generative adversarial network (InfoGAN ) to one dimensional ( 1D ) time series data with a supervised loss function. So it's called semisupervised Info GAN." } repositories: { - url: "https://github.com/Evavanrooijen/InfoGAN-PyTorch" - owner: "Evavanrooijen" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/openai/InfoGAN" + owner: "openai" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 965 + description: "Code for reproducing key results in the paper \"InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets\"" } repositories: { - url: "https://github.com/inkplatform/InfoGAN-PyTorch" - owner: "inkplatform" - framework: FRAMEWORK_PYTORCH - description: "code for InfoGAN" + url: "https://github.com/landeros10/infoganJL" + owner: "landeros10" + framework: FRAMEWORK_OTHERS + description: "InfoGAN implementation in Julia with flexible nn architectures for generator and descriminator" } repositories: { - url: "https://github.com/vinoth654321/Casia-Webface" - owner: "vinoth654321" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + url: "https://github.com/buriburisuri/timeseries_gan" + owner: "buriburisuri" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 267 + description: "A tensorflow implementation of GAN ( exactly InfoGAN or Info GAN ) to one dimensional ( 1D ) time series data." } repositories: { - url: "https://github.com/bacdavid/InfomaxVAE" - owner: "bacdavid" - framework: FRAMEWORK_OTHERS - description: "Obtain the latent variables that contain the maximal mutual information." + url: "https://github.com/Natsu6767/InfoGAN-PyTorch" + owner: "Natsu6767" + framework: FRAMEWORK_PYTORCH + number_of_stars: 221 + description: "PyTorch Implementation of InfoGAN" } methods: { name: "Batch Normalization" @@ -35595,12 +35939,20 @@ pr_id_to_video: { authors: "Xi Peng" authors: "Dimitris Metaxas" authors: "Ting Liu" + repositories: { + is_official: true + url: "https://github.com/google-research/google-research" + owner: "google-research" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 18790 + description: "Google Research" + } } video: { video_id: "yFIMPxdQTe0" video_title: "PR-274: On mutual information maximization for representation learning" - number_of_likes: 17 - number_of_views: 892 + number_of_likes: 19 + number_of_views: 980 published_date: { seconds: 1600609781 } @@ -35733,7 +36085,7 @@ pr_id_to_video: { video_id: "Itt92ztDfMo" video_title: "PR-275: On Robustness and Transferability of Convolutional Neural Networks" number_of_likes: 9 - number_of_views: 659 + number_of_views: 672 published_date: { seconds: 1600613898 } @@ -35762,6 +36114,13 @@ pr_id_to_video: { authors: "Giacomo Tarroni" authors: "Wenjia Bai" authors: "Daniel Rueckert" + repositories: { + url: "https://github.com/cherise215/advchain" + owner: "cherise215" + framework: FRAMEWORK_PYTORCH + number_of_stars: 13 + description: "Adversarial Data Augmentation with Chained Transformations (Adv Chain)" + } } papers: { paper_id: "medical-image-synthesis-for-data-augmentation" @@ -35797,7 +36156,7 @@ pr_id_to_video: { url: "https://github.com/xxxliu95/RA_FA_Cardiac" owner: "xxxliu95" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 + number_of_stars: 4 } } papers: { @@ -35902,7 +36261,7 @@ pr_id_to_video: { video_id: "bHZD2sXbSm8" video_title: "PR-276: Realistic Adversarial Data Augmentation for MR Image Segmentation" number_of_likes: 3 - number_of_views: 317 + number_of_views: 327 published_date: { seconds: 1601282428 } @@ -35925,60 +36284,60 @@ pr_id_to_video: { authors: "Dan Hendrycks" authors: "Thomas Dietterich" repositories: { - url: "https://github.com/edadaltocg/datasets" - owner: "edadaltocg" + url: "https://github.com/automl/nes" + owner: "automl" framework: FRAMEWORK_PYTORCH - description: "Download scripts to open datasets." + number_of_stars: 19 + description: "Neural Ensemble Search for Performant and Calibrated Predictions" } repositories: { - url: "https://github.com/allenai/robustnav" - owner: "allenai" + url: "https://github.com/yueatsprograms/ttt_imagenet_release" + owner: "yueatsprograms" framework: FRAMEWORK_PYTORCH - number_of_stars: 13 - description: "Evaluating pre-trained navigation agents under corruptions" + number_of_stars: 68 + description: "TTT Code Release" } repositories: { - url: "https://github.com/EPFL-VILAB/XDEnsembles" - owner: "EPFL-VILAB" + url: "https://github.com/yueatsprograms/ttt_cifar_release" + owner: "yueatsprograms" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 - description: "Robustness via Cross-Domain Ensembles" + number_of_stars: 52 + description: "TTT Code Release" } repositories: { - url: "https://github.com/facebookresearch/augmentation-corruption" - owner: "facebookresearch" + url: "https://github.com/EPFL-VILAB/XDEnsembles" + owner: "EPFL-VILAB" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." + number_of_stars: 17 + description: "Robustness via Cross-Domain Ensembles" } repositories: { is_official: true url: "https://github.com/hendrycks/robustness" owner: "hendrycks" framework: FRAMEWORK_PYTORCH - number_of_stars: 526 + number_of_stars: 544 description: "Corruption and Perturbation Robustness (ICLR 2019)" } repositories: { - url: "https://github.com/yueatsprograms/ttt_cifar_release" - owner: "yueatsprograms" + url: "https://github.com/facebookresearch/augmentation-corruption" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 47 - description: "TTT Code Release" + number_of_stars: 13 + description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." } repositories: { - url: "https://github.com/yueatsprograms/ttt_imagenet_release" - owner: "yueatsprograms" + url: "https://github.com/allenai/robustnav" + owner: "allenai" framework: FRAMEWORK_PYTORCH - number_of_stars: 64 - description: "TTT Code Release" + number_of_stars: 14 + description: "Evaluating pre-trained navigation agents under corruptions" } repositories: { - url: "https://github.com/automl/nes" - owner: "automl" + url: "https://github.com/edadaltocg/datasets" + owner: "edadaltocg" framework: FRAMEWORK_PYTORCH - number_of_stars: 18 - description: "Neural Ensemble Search for Performant and Calibrated Predictions" + description: "Download scripts to open datasets." } methods: { name: "ResNet" @@ -36045,7 +36404,7 @@ pr_id_to_video: { url: "https://github.com/bethgelab/imagecorruptions" owner: "bethgelab" framework: FRAMEWORK_OTHERS - number_of_stars: 182 + number_of_stars: 190 description: "Python package to corrupt arbitrary images." } methods: { @@ -36212,27 +36571,27 @@ pr_id_to_video: { authors: "Matthias Bethge" authors: "Wieland Brendel" repositories: { - url: "https://github.com/facebookresearch/augmentation-corruption" - owner: "facebookresearch" + is_official: true + url: "https://github.com/hendrycks/robustness" + owner: "hendrycks" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." + number_of_stars: 544 + description: "Corruption and Perturbation Robustness (ICLR 2019)" } repositories: { is_official: true url: "https://github.com/bethgelab/game-of-noise" owner: "bethgelab" framework: FRAMEWORK_PYTORCH - number_of_stars: 48 + number_of_stars: 50 description: "Trained model weights, training and evaluation code from the paper \"Increasing the robustness of DNNs against image corruptions by playing the Game of Noise\" " } repositories: { - is_official: true - url: "https://github.com/hendrycks/robustness" - owner: "hendrycks" + url: "https://github.com/facebookresearch/augmentation-corruption" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 526 - description: "Corruption and Perturbation Robustness (ICLR 2019)" + number_of_stars: 13 + description: "This repository provides code for \"On Interaction Between Augmentations and Corruptions in Natural Corruption Robustness\"." } } papers: { @@ -36251,7 +36610,7 @@ pr_id_to_video: { video_id: "EE4BxrAbNM8" video_title: "PR-277: Benchmarking Neural Network Robustness to Common Corruptions and Perturbations" number_of_likes: 4 - number_of_views: 509 + number_of_views: 536 published_date: { seconds: 1601216630 } @@ -36278,7 +36637,7 @@ pr_id_to_video: { url: "https://github.com/princeton-vl/RAFT" owner: "princeton-vl" framework: FRAMEWORK_PYTORCH - number_of_stars: 1375 + number_of_stars: 1433 } } papers: { @@ -36311,8 +36670,8 @@ pr_id_to_video: { url: "https://github.com/weiyithu/PV-RAFT" owner: "weiyithu" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "Code for \"PV-RAFT: Point-Voxel Correlation Fields for Scene Flow Estimation of Point Clouds\", CVPR 2021" + number_of_stars: 24 + description: "[CVPR 2021] PV-RAFT: Point-Voxel Correlation Fields for Scene Flow Estimation of Point Clouds" } } papers: { @@ -36346,7 +36705,7 @@ pr_id_to_video: { url: "https://github.com/zacjiang/scv" owner: "zacjiang" framework: FRAMEWORK_PYTORCH - number_of_stars: 83 + number_of_stars: 102 description: "Learning Optical Flow from a Few Matches (CVPR 2021)" } } @@ -36379,6 +36738,13 @@ pr_id_to_video: { authors: "Lena Maier-Hein" authors: "Carsten Rother" authors: "Ullrich Köthe" + repositories: { + url: "https://github.com/VLL-HD/analyzing_inverse_problems" + owner: "VLL-HD" + framework: FRAMEWORK_PYTORCH + number_of_stars: 35 + description: "Code for the paper \"Analyzing inverse problems with invertible neural networks.\" (2018)" + } repositories: { url: "https://github.com/jaekookang/invertible_neural_networks" owner: "jaekookang" @@ -36386,13 +36752,6 @@ pr_id_to_video: { number_of_stars: 17 description: "Normalizing-flow Invertible Neural Networks (TensorFlow2+Keras)" } - repositories: { - url: "https://github.com/VLL-HD/analyzing_inverse_problems" - owner: "VLL-HD" - framework: FRAMEWORK_PYTORCH - number_of_stars: 32 - description: "Code for the paper \"Analyzing inverse problems with invertible neural networks.\" (2018)" - } } papers: { paper_id: "raft-3d-scene-flow-using-rigid-motion" @@ -36409,14 +36768,14 @@ pr_id_to_video: { url: "https://github.com/princeton-vl/RAFT-3D" owner: "princeton-vl" framework: FRAMEWORK_PYTORCH - number_of_stars: 71 + number_of_stars: 80 } } video: { video_id: "OnZIDatotZ4" video_title: "PR-278: RAFT: Recurrent All-Pairs Field Transforms for Optical Flow" - number_of_likes: 11 - number_of_views: 1163 + number_of_likes: 16 + number_of_views: 1270 published_date: { seconds: 1602487022 } @@ -36446,7 +36805,7 @@ pr_id_to_video: { url: "https://github.com/clinicalml/mimic_annotations" owner: "clinicalml" framework: FRAMEWORK_OTHERS - number_of_stars: 1 + number_of_stars: 2 } } papers: { @@ -36499,7 +36858,7 @@ pr_id_to_video: { url: "https://github.com/spiritdjy/MixPaper" owner: "spiritdjy" framework: FRAMEWORK_OTHERS - number_of_stars: 13 + number_of_stars: 14 description: "论文阅读" } } @@ -36531,7 +36890,7 @@ pr_id_to_video: { url: "https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/1.4.Biomedical_NER_SparkNLP_paper_reproduce.ipynb" owner: "Healthcare" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 523 + number_of_stars: 546 description: "Public runnable examples of using John Snow Labs' NLP for Apache Spark." } methods: { @@ -36636,7 +36995,7 @@ pr_id_to_video: { video: { video_id: "49JPfaYWZ3M" video_title: "PR-279: Robust Benchmarking for Machine Learning of Clinical Entity Extraction" - number_of_views: 241 + number_of_views: 243 published_date: { seconds: 1603030069 } @@ -36661,68 +37020,66 @@ pr_id_to_video: { authors: "Fanyi Xiao" authors: "Yong Jae Lee" repositories: { - url: "https://github.com/Abhijeet8901/Instance-Segmentation-using-YOLACT" - owner: "Abhijeet8901" + url: "https://github.com/YeshengSu/Yolact" + owner: "YeshengSu" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "Instance Segmentation Using YOLACT" + description: "real-time instance segmentation" } repositories: { - url: "https://github.com/Ikomia-dev/Yolact" - owner: "Ikomia-dev" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Reuse & reproduce YOLACT in Ikomia Studio thanks to this plugin wrapping original code." + url: "https://github.com/BigThreeMI/Utils" + owner: "BigThreeMI" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/adityarc19/traffic-vehicles-instance-segmentation" - owner: "adityarc19" + url: "https://github.com/zhhchen4njit/yolact" + owner: "zhhchen4njit" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "This is a real time instance segmentation task implemented with YOLACT++ and DCNv2 on Google Colab. " } repositories: { - url: "https://github.com/Jittor/InstanceSegmentation-jittor" - owner: "Jittor" + url: "https://github.com/lucasfporto/yolactTest" + owner: "lucasfporto" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 } repositories: { - url: "https://github.com/hololee/YOLACT" - owner: "hololee" - framework: FRAMEWORK_PYTORCH - description: "Implement YOLACT paper and apply to the custom dataset." + url: "https://github.com/IntelligenceDatum/ICCV2019_Model_Compression" + owner: "IntelligenceDatum" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 } repositories: { - url: "https://github.com/anshkumar/yolact" - owner: "anshkumar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18 - description: "Tensorflow 2.3 implementation YOLACT" + url: "https://github.com/Ma-Dan/Yolact-CoreML" + owner: "Ma-Dan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 25 + description: "Yolact for iOS implemented using CoreML." } repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" + url: "https://github.com/divyachandana/yolact" + owner: "divyachandana" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" } repositories: { - url: "https://github.com/zhawhjw/yolact-interpret" - owner: "zhawhjw" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/leohsuofnthu/Tensorflow-YOLACT" + owner: "leohsuofnthu" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 79 + description: "Implementation of the paper \"YOLACT Real-time Instance Segmentation\" in Tensorflow 2" } repositories: { - url: "https://github.com/artneer/yolact" - owner: "artneer" + url: "https://github.com/youngwanLEE/CenterMask" + owner: "youngwanLEE" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "A simple, fully convolutional model for real-time instance segmentation." + number_of_stars: 680 + description: "CenterMask : Real-Time Anchor-Free Instance Segmentation, in CVPR 2020" } repositories: { - url: "https://github.com/hz-ants/yolact" - owner: "hz-ants" + is_official: true + url: "https://github.com/dbolya/yolact" + owner: "dbolya" framework: FRAMEWORK_PYTORCH + number_of_stars: 3868 + description: "A simple, fully convolutional model for real-time instance segmentation." } methods: { name: "Global Average Pooling" @@ -36788,62 +37145,70 @@ pr_id_to_video: { authors: "Fanyi Xiao" authors: "Yong Jae Lee" repositories: { - url: "https://github.com/anshkumar/yolact" - owner: "anshkumar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18 - description: "Tensorflow 2.3 implementation YOLACT" + url: "https://github.com/YeshengSu/Yolact" + owner: "YeshengSu" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "real-time instance segmentation" } repositories: { - url: "https://github.com/lucasfporto/yolactTest" - owner: "lucasfporto" + url: "https://github.com/divyachandana/yolact" + owner: "divyachandana" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/Jittor/InstanceSegmentation-jittor" - owner: "Jittor" + is_official: true + url: "https://github.com/dbolya/yolact" + owner: "dbolya" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + number_of_stars: 3868 + description: "A simple, fully convolutional model for real-time instance segmentation." } repositories: { - url: "https://github.com/adityarc19/traffic-vehicles-instance-segmentation" - owner: "adityarc19" + url: "https://github.com/jiajunhua/dbolya-yolact" + owner: "jiajunhua" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "This is a real time instance segmentation task implemented with YOLACT++ and DCNv2 on Google Colab. " + number_of_stars: 2 } repositories: { - url: "https://github.com/Ikomia-dev/Yolact" - owner: "Ikomia-dev" + url: "https://github.com/KevinJia1212/yolact_cityscapes_550" + owner: "KevinJia1212" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Reuse & reproduce YOLACT in Ikomia Studio thanks to this plugin wrapping original code." + number_of_stars: 11 + description: "Modified real-time instance segmentation network YOLACT for several object categories on CityScapes dataset " } repositories: { - url: "https://github.com/KHKHG/yolact" - owner: "KHKHG" + url: "https://github.com/DataXujing/yolact_pytorch" + owner: "DataXujing" framework: FRAMEWORK_PYTORCH + number_of_stars: 36 + description: ":fire: :fire: :fire:Train Your Own DataSet for YOLACT and YOLACT++ Instance Segmentation Model!!!" } repositories: { - url: "https://github.com/hampen2929/yolact" - owner: "hampen2929" + url: "https://github.com/kogans1107/AR_yolact" + owner: "kogans1107" framework: FRAMEWORK_PYTORCH + number_of_stars: 2 } repositories: { - url: "https://github.com/hyunahOh/instance_segmentation" - owner: "hyunahOh" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/anshkumar/yolact" + owner: "anshkumar" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 20 + description: "Tensorflow 2.3 implementation YOLACT" } repositories: { - url: "https://github.com/thuyhoang-hvtt/emage-yolact" - owner: "thuyhoang-hvtt" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/zhengzhe97/yolactpaddle" + owner: "zhengzhe97" + framework: FRAMEWORK_OTHERS + description: "本项目是YOLACT-550++的PaddlePaddle实现, 包含模型训练, 测试, 数据集等内容。 项目主体基于PaddleDetection V0.2" } repositories: { - url: "https://github.com/DivaniMandi/myCustomDataset_yolact" - owner: "DivaniMandi" + url: "https://github.com/kaylode/Clothes-Segmentation" + owner: "kaylode" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 3 + description: "Project for studying" } methods: { name: "Global Average Pooling" @@ -36908,20 +37273,20 @@ pr_id_to_video: { authors: "Rafael A. Rivera Soto" authors: "Fanyi Xiao" authors: "Yong Jae Lee" + repositories: { + url: "https://github.com/fanzhemeng/yolact_edge-myver" + owner: "fanzhemeng" + framework: FRAMEWORK_PYTORCH + number_of_stars: 4 + } repositories: { is_official: true url: "https://github.com/haotian-liu/yolact_edge" owner: "haotian-liu" framework: FRAMEWORK_PYTORCH - number_of_stars: 815 + number_of_stars: 855 description: "The first competitive instance segmentation approach that runs on small edge devices at real-time speeds." } - repositories: { - url: "https://github.com/fanzhemeng/yolact_edge-myver" - owner: "fanzhemeng" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - } } papers: { paper_id: "assessing-yolact-for-real-time-and-robust" @@ -36963,7 +37328,6 @@ pr_id_to_video: { } authors: "Eslam Mohamed" authors: "Abdelrahman Shaker" - authors: "Hazem Rashed" authors: "Ahmad El-Sallab" authors: "Mayada Hadhoud" } @@ -36977,18 +37341,6 @@ pr_id_to_video: { } authors: "Youngwan Lee" authors: "Jongyoul Park" - repositories: { - url: "https://github.com/mahdi-darvish/centermask" - owner: "mahdi-darvish" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } - repositories: { - url: "https://github.com/suvasis/birdnet2cs231n" - owner: "suvasis" - framework: FRAMEWORK_PYTORCH - description: "birdnet+ improvement" - } repositories: { url: "https://github.com/zhuoyang125/CenterMask2" owner: "zhuoyang125" @@ -37000,7 +37352,7 @@ pr_id_to_video: { url: "https://github.com/youngwanLEE/vovnet-detectron2" owner: "youngwanLEE" framework: FRAMEWORK_PYTORCH - number_of_stars: 271 + number_of_stars: 278 description: "VoVNet backbone networks for detectron2, in CVPR 2020" } repositories: { @@ -37008,16 +37360,28 @@ pr_id_to_video: { url: "https://github.com/youngwanLEE/CenterMask" owner: "youngwanLEE" framework: FRAMEWORK_PYTORCH - number_of_stars: 676 + number_of_stars: 680 description: "CenterMask : Real-Time Anchor-Free Instance Segmentation, in CVPR 2020" } repositories: { url: "https://github.com/youngwanLEE/centermask2" owner: "youngwanLEE" framework: FRAMEWORK_PYTORCH - number_of_stars: 635 + number_of_stars: 638 description: "CenterMask : Real-time Anchor-Free Instance Segmentation, in CVPR 2020" } + repositories: { + url: "https://github.com/suvasis/birdnet2cs231n" + owner: "suvasis" + framework: FRAMEWORK_PYTORCH + description: "birdnet+ improvement" + } + repositories: { + url: "https://github.com/mahdi-darvish/centermask" + owner: "mahdi-darvish" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } methods: { name: "Non Maximum Suppression" full_name: "Non Maximum Suppression" @@ -37135,7 +37499,7 @@ pr_id_to_video: { video_id: "2i56S0tNUj8" video_title: "PR-280: YOLACT: Real-time Instance Segmentation" number_of_likes: 16 - number_of_views: 793 + number_of_views: 857 published_date: { seconds: 1603930883 } @@ -37168,74 +37532,72 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Neil Houlsby" repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + url: "https://github.com/KatherLab/HIA" + owner: "KatherLab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Histopathology Image Analysis" } repositories: { - is_official: true - url: "https://github.com/google-research/vision_transformer" - owner: "google-research" - framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + url: "https://github.com/protonx-engineering/vit" + owner: "protonx-engineering" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Our implementation for paper: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" } repositories: { - url: "https://github.com/wangguanan/light-reid" - owner: "wangguanan" + url: "https://github.com/YousefGamal220/Vision-Transformers" + owner: "YousefGamal220" framework: FRAMEWORK_PYTORCH - number_of_stars: 333 - description: "[ECCV2020] a toolbox of light-reid learning for faster inference, speed both feature extraction and retrieval stages up to >30x" + number_of_stars: 4 + description: "A PyTorch Implementation of Vision-Transformers to classify the classes of CIFAR-100 dataset, the model implemented from the paper: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" } repositories: { - url: "https://github.com/nachiket273/VisTrans" - owner: "nachiket273" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Implementations of transformers based models for different vision tasks" + url: "https://github.com/woctezuma/steam-CLIP" + owner: "woctezuma" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "Retrieve Steam games with similar store banners, with OpenAI's CLIP." } repositories: { - url: "https://github.com/quanmario0311/ViT_PyTorch" - owner: "quanmario0311" + url: "https://github.com/gnoses/ViT_examples" + owner: "gnoses" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "A PyTorch Implementation of ViT (Vision Transformer)" + number_of_stars: 1 } repositories: { - url: "https://github.com/purbayankar/Hyperspectral-Vision-Transformer" - owner: "purbayankar" + url: "https://github.com/nachiket273/Vision_transformer_pytorch" + owner: "nachiket273" framework: FRAMEWORK_PYTORCH number_of_stars: 6 - description: "A PyTorch implementation of CNN+Vision Transformer for hyperspectral image classification" + description: "Simple Implementation of Vision Transformer (https://openreview.net/pdf?id=YicbFdNTTy)" } repositories: { - url: "https://github.com/asarigun/TransGAN" - owner: "asarigun" + url: "https://github.com/tahmid0007/VisionTransformer" + owner: "tahmid0007" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "This is re-implementation of TransGAN in PyTorch." + number_of_stars: 65 + description: "A complete easy to follow implementation of Google's Vision Transformer proposed in \"AN IMAGE IS WORTH 16X16 WORDS\". This pytorch implementation has comments for better understanding." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" + url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/vision_models/vision_transformer" + owner: "vision_models" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + number_of_stars: 3 + description: "ai codebase" } repositories: { - url: "https://github.com/04RR/SOTA-Vision" - owner: "04RR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of various state of the art architectures used in computer vision. " + url: "https://github.com/sayannath/ViT-Image-Classification" + owner: "sayannath" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "Image Classification with Vision Transformer - Keras" } repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/vision_models/vision_transformer" - owner: "vision_models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "ai codebase" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } methods: { name: "Residual Connection" @@ -37285,7 +37647,7 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." } } papers: { @@ -37306,7 +37668,7 @@ pr_id_to_video: { url: "https://github.com/blackfeather-wang/Dynamic-Vision-Transformer" owner: "blackfeather-wang" framework: FRAMEWORK_PYTORCH - number_of_stars: 93 + number_of_stars: 94 description: "Accelerating T2t-ViT by 1.6-3.6x." } methods: { @@ -37371,21 +37733,21 @@ pr_id_to_video: { authors: "Gilad Sharir" authors: "Asaf Noy" authors: "Lihi Zelnik-Manor" + repositories: { + url: "https://github.com/lucidrains/STAM-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 86 + description: "Implementation of STAM (Space Time Attention Model), a pure and simple attention model that reaches SOTA for video classification" + } repositories: { is_official: true url: "https://github.com/Alibaba-MIIL/STAM" owner: "Alibaba-MIIL" framework: FRAMEWORK_PYTORCH - number_of_stars: 161 + number_of_stars: 166 description: "Official implementation of \"An Image is Worth 16x16 Words, What is a Video Worth?\" (2021 paper) " } - repositories: { - url: "https://github.com/lucidrains/STAM-pytorch" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 85 - description: "Implementation of STAM (Space Time Attention Model), a pure and simple attention model that reaches SOTA for video classification" - } methods: { name: "Convolution" full_name: "Convolution" @@ -37421,31 +37783,31 @@ pr_id_to_video: { url: "https://github.com/huawei-noah/CV-backbones" owner: "huawei-noah" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1526 + number_of_stars: 1559 description: "CV backbones including GhostNet, TinyNet and TNT." } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } repositories: { is_official: true url: "https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch" owner: "master" framework: FRAMEWORK_PYTORCH - number_of_stars: 1526 + number_of_stars: 1559 description: "CV backbones including GhostNet, TinyNet and TNT." } repositories: { url: "https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/tnt.py" owner: "models" framework: FRAMEWORK_PYTORCH - number_of_stars: 11580 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } + repositories: { + url: "https://github.com/lucidrains/transformer-in-transformer" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 240 + description: "Implementation of Transformer in Transformer, pixel level attention paired with patch level attention for image classification, in Pytorch" + } repositories: { url: "https://github.com/NZ99/transformer_in_transformer_flax" owner: "NZ99" @@ -37453,11 +37815,11 @@ pr_id_to_video: { number_of_stars: 20 } repositories: { - url: "https://github.com/lucidrains/transformer-in-transformer" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 234 - description: "Implementation of Transformer in Transformer, pixel level attention paired with patch level attention for image classification, in Pytorch" + url: "https://github.com/PaddlePaddle/PaddleClas" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2166 + description: "A treasure chest for visual recognition powered by PaddlePaddle" } methods: { name: "Attention Dropout" @@ -37547,48 +37909,18 @@ pr_id_to_video: { url: "https://github.com/lukemelas/do-you-even-need-attention" owner: "lukemelas" framework: FRAMEWORK_PYTORCH - number_of_stars: 427 + number_of_stars: 430 description: "Exploring whether attention is necessary for vision transformers" } - methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" - } - methods: { - name: "Feedforward Network" - full_name: "Feedforward Network" - description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "DeiT" - full_name: "Data-efficient Image Transformer" - description: "A **Data-Efficient Image Transformer** is a type of Vision Transformer for image classification tasks. The model is trained using a teacher-student strategy specific to transformers. It relies on a distillation token ensuring that the student learns from the teacher through attention." - } methods: { name: "Softmax" full_name: "Softmax" description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "Vision Transformer" - full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" } methods: { name: "Dropout" @@ -37600,7 +37932,7 @@ pr_id_to_video: { paper_id: "visual-transformer-pruning" title: "Visual Transformer Pruning" arxiv_id: "2104.08500" - abstract: "Visual transformer has achieved competitive performance on a variety of computer vision applications. However, their storage, run-time memory, and computational demands are hindering the deployment on mobile devices. Here we present an visual transformer pruning approach, which identifies the impacts of channels in each layer and then executes pruning accordingly. By encouraging channel-wise sparsity in the Transformer, important channels automatically emerge. A great number of channels with small coefficients can be discarded to achieve a high pruning ratio without significantly compromising accuracy. The pipeline for visual transformer pruning is as follows: 1) training with sparsity regularization; 2) pruning channels; 3) finetuning. The reduced parameters and FLOPs ratios of the proposed algorithm are well evaluated and analyzed on ImageNet dataset to demonstrate its effectiveness." + abstract: "Vision transformer has achieved competitive performance on a variety of computer vision applications. However, their storage, run-time memory, and computational demands are hindering the deployment to mobile devices. Here we present a vision transformer pruning approach, which identifies the impacts of dimensions in each layer of transformer and then executes pruning accordingly. By encouraging dimension-wise sparsity in the transformer, important dimensions automatically emerge. A great number of dimensions with small importance scores can be discarded to achieve a high pruning ratio without significantly compromising accuracy. The pipeline for vision transformer pruning is as follows: 1) training with sparsity regularization; 2) pruning dimensions of linear projections; 3) fine-tuning. The reduced parameters and FLOPs ratios of the proposed algorithm are well evaluated and analyzed on ImageNet dataset to demonstrate the effectiveness of our proposed method." published_date: { seconds: 1618617600 } @@ -37643,6 +37975,11 @@ pr_id_to_video: { full_name: "Softmax" description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } + methods: { + name: "Vision Transformer" + full_name: "Vision Transformer" + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." + } methods: { name: "Adam" full_name: "Adam" @@ -37653,17 +37990,12 @@ pr_id_to_video: { full_name: "Multi-Head Attention" description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } } video: { video_id: "D72_Cn-XV1g" video_title: "PR-281: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" - number_of_likes: 66 - number_of_views: 4070 + number_of_likes: 73 + number_of_views: 4463 published_date: { seconds: 1603685088 } @@ -37713,6 +38045,11 @@ pr_id_to_video: { authors: "Jianwei Zhang" authors: "Jingren Zhou" authors: "Hongxia Yang" + methods: { + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" + } } papers: { paper_id: "disentangling-sampling-and-labeling-bias-for" @@ -37800,24 +38137,6 @@ pr_id_to_video: { authors: "Lina Yao" authors: "Aixin Sun" authors: "Yi Tay" - repositories: { - url: "https://github.com/philippe-gagne/treasure-boxd" - owner: "philippe-gagne" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Treasure-boxd is a tool that predicts movie preferences and provides film recommendations based on users' Letterboxd data." - } - repositories: { - url: "https://github.com/YichenLin/MATH-80600A-Project" - owner: "YichenLin" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/abmitra84/recommender_system" - owner: "abmitra84" - framework: FRAMEWORK_OTHERS - number_of_stars: 1 - } repositories: { url: "https://github.com/anuragreddygv323/Important-stuff" owner: "anuragreddygv323" @@ -37851,6 +38170,24 @@ pr_id_to_video: { framework: FRAMEWORK_TENSORFLOW description: "Collection of codes and papers in the topic of recommender system" } + repositories: { + url: "https://github.com/abmitra84/recommender_system" + owner: "abmitra84" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/YichenLin/MATH-80600A-Project" + owner: "YichenLin" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/philippe-gagne/treasure-boxd" + owner: "philippe-gagne" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + description: "Treasure-boxd is a tool that predicts movie preferences and provides film recommendations based on users' Letterboxd data." + } } papers: { paper_id: "a-model-of-two-tales-dual-transfer-learning" @@ -37883,7 +38220,7 @@ pr_id_to_video: { video_id: "FSDuo9ybv8s" video_title: "PR-282: Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations" number_of_likes: 3 - number_of_views: 436 + number_of_views: 445 published_date: { seconds: 1604242257 } @@ -37946,7 +38283,7 @@ pr_id_to_video: { url: "https://github.com/inesylla/energy-disaggregation-DL" owner: "inesylla" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 3 } } papers: { @@ -37971,7 +38308,7 @@ pr_id_to_video: { video_id: "c5XtV-CkzM0" video_title: "PR-283: Herring: Rethinking the Parameter Server at Scale for the Cloud" number_of_likes: 3 - number_of_views: 449 + number_of_views: 451 published_date: { seconds: 1604239098 } @@ -38016,7 +38353,7 @@ pr_id_to_video: { paper_id: "semantic-models-for-the-first-stage-retrieval" title: "Semantic Models for the First-stage Retrieval: A Comprehensive Review" arxiv_id: "2103.04831" - abstract: "Multi-stage ranking pipelines have been a practical solution in modern search systems, where the first-stage retrieval is to return a subset of candidate documents, and the latter stages attempt to re-rank those candidates. Unlike the re-ranking stages going through quick technique shifts during the past decades, the first-stage retrieval has long been dominated by classical term-based models. Unfortunately, these models suffer from the vocabulary mismatch problem, which may block the re-ranking stages from relevant documents at the very beginning. Therefore, it has been a long-term desire to build semantic models for the first-stage retrieval that can achieve high recall efficiently. Recently, we have witnessed an explosive growth of research interests on the first-stage semantic retrieval models. We believe it is the right time to survey the current status, learn from existing methods, and gain some insights for future development. In this paper, we describe the current landscape of semantic retrieval models from three major paradigms, paying special attention to recent neural-based methods. We review the benchmark datasets, optimization methods and evaluation metrics, and summarize the state-of-the-art models. We also discuss the unresolved challenges and suggest potentially promising directions for future work." + abstract: "Multi-stage ranking pipelines have been a practical solution in modern search systems, where the first-stage retrieval is to return a subset of candidate documents, and latter stages attempt to re-rank those candidates. Unlike re-ranking stages going through quick technique shifts during past decades, the first-stage retrieval has long been dominated by classical term-based models. Unfortunately, these models suffer from the vocabulary mismatch problem, which may block re-ranking stages from relevant documents at the very beginning. Therefore, it has been a long-term desire to build semantic models for the first-stage retrieval that can achieve high recall efficiently. Recently, we have witnessed an explosive growth of research interests on the first-stage semantic retrieval models. We believe it is the right time to survey current status, learn from existing methods, and gain some insights for future development. In this paper, we describe the current landscape of the first-stage retrieval models under a unified framework to clarify the connection between classical term-based retrieval methods, early semantic retrieval methods and neural semantic retrieval methods. Moreover, we identify some open challenges and envision some future directions, with the hope of inspiring more researches on these important yet less investigated topics." published_date: { seconds: 1615161600 } @@ -38092,9 +38429,9 @@ pr_id_to_video: { } papers: { paper_id: "rethinking-search-making-experts-out-of" - title: "Rethinking Search: Making Experts out of Dilettantes" + title: "Rethinking Search: Making Domain Experts out of Dilettantes" arxiv_id: "2105.02274" - abstract: "When experiencing an information need, users want to engage with an expert, but often turn to an information retrieval system, such as a search engine, instead. Classical information retrieval systems do not answer information needs directly, but instead provide references to (hopefully authoritative) answers. Successful question answering systems offer a limited corpus created on-demand by human experts, which is neither timely nor scalable. Large pre-trained language models, by contrast, are capable of directly generating prose that may be responsive to an information need, but at present they are dilettantes rather than experts - they do not have a true understanding of the world, they are prone to hallucinating, and crucially they are incapable of justifying their utterances by referring to supporting documents in the corpus they were trained over. This paper examines how ideas from classical information retrieval and large pre-trained language models can be synthesized and evolved into systems that truly deliver on the promise of expert advice." + abstract: "When experiencing an information need, users want to engage with a domain expert, but often turn to an information retrieval system, such as a search engine, instead. Classical information retrieval systems do not answer information needs directly, but instead provide references to (hopefully authoritative) answers. Successful question answering systems offer a limited corpus created on-demand by human experts, which is neither timely nor scalable. Pre-trained language models, by contrast, are capable of directly generating prose that may be responsive to an information need, but at present they are dilettantes rather than domain experts -- they do not have a true understanding of the world, they are prone to hallucinating, and crucially they are incapable of justifying their utterances by referring to supporting documents in the corpus they were trained over. This paper examines how ideas from classical information retrieval and pre-trained language models can be synthesized and evolved into systems that truly deliver on the promise of domain expert advice." published_date: { seconds: 1620172800 } @@ -38172,7 +38509,7 @@ pr_id_to_video: { url: "https://github.com/luyug/COIL" owner: "luyug" framework: FRAMEWORK_PYTORCH - number_of_stars: 25 + number_of_stars: 43 description: "NAACL2021 - COIL Contextualized Lexical Retriever " } } @@ -38243,7 +38580,7 @@ pr_id_to_video: { video_id: "QfkcN4SZ1Po" video_title: "PR-285: Leveraging Semantic and Lexical Matching to Improve the Recall of Document Retrieval Systems" number_of_likes: 9 - number_of_views: 296 + number_of_views: 298 published_date: { seconds: 1604938502 } @@ -38274,7 +38611,7 @@ pr_id_to_video: { url: "https://github.com/mahmoodlab/CLAM" owner: "mahmoodlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 182 + number_of_stars: 200 description: "Data-efficient and weakly supervised computational pathology on whole slide images - Nature Biomedical Engineering" } } @@ -38310,7 +38647,7 @@ pr_id_to_video: { url: "https://github.com/mahmoodlab/TOAD" owner: "mahmoodlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 76 + number_of_stars: 83 description: "AI-based pathology predicts origins for cancers of unknown primary - Nature" } } @@ -38333,7 +38670,7 @@ pr_id_to_video: { url: "https://github.com/YashSharma/C2C" owner: "YashSharma" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 7 description: "Implementation of Cluster-to-Conquer: A Framework for End-to-End Multi-Instance Learning for Whole Slide Image Classification approach." } } @@ -38373,7 +38710,7 @@ pr_id_to_video: { url: "https://github.com/DIAGNijmegen/pathology-streaming-pipeline" owner: "DIAGNijmegen" framework: FRAMEWORK_PYTORCH - number_of_stars: 32 + number_of_stars: 34 description: "Use streaming to train whole-slides images with single image-level labels, by reducing GPU memory requirements with 99%." } } @@ -38442,6 +38779,13 @@ pr_id_to_video: { authors: "Drew F. K. Williamson" authors: "Tiffany Y. Chen" authors: "Faisal Mahmood" + repositories: { + url: "https://github.com/mahmoodlab/HistoFL" + owner: "mahmoodlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Federated Learning for Computational Pathology" + } } papers: { paper_id: "self-similarity-student-for-partial-label" @@ -38493,7 +38837,7 @@ pr_id_to_video: { video_id: "JqiTHkWR-PU" video_title: "PR-286: Clinical-grade computational pathology using weakly supervised deep learning on WSIs" number_of_likes: 8 - number_of_views: 119 + number_of_views: 134 published_date: { seconds: 1611540672 } @@ -38521,7 +38865,7 @@ pr_id_to_video: { url: "https://github.com/wichmann-lab/error-consistency" owner: "wichmann-lab" framework: FRAMEWORK_OTHERS - number_of_stars: 3 + number_of_stars: 4 description: "Error consistency: a black-box analysis for comparing errors between decision makers (NeurIPS 2020)" } } @@ -38542,8 +38886,8 @@ pr_id_to_video: { url: "https://github.com/shikhartuli/cnn_txf_bias" owner: "shikhartuli" framework: FRAMEWORK_OTHERS - number_of_stars: 20 - description: "Study of human inductive biases in CNNs and Transformers." + number_of_stars: 26 + description: "[CogSci'21] Study of human inductive biases in CNNs and Transformers." } methods: { name: "Residual Connection" @@ -38583,7 +38927,7 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." } methods: { name: "Multi-Head Attention" @@ -38626,12 +38970,20 @@ pr_id_to_video: { authors: "Matthias Bethge" authors: "Felix A. Wichmann" authors: "Wieland Brendel" + repositories: { + is_official: true + url: "https://github.com/bethgelab/model-vs-human" + owner: "bethgelab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 63 + description: "Benchmark your model on out-of-distribution datasets with carefully collected human comparison data " + } } papers: { paper_id: "contrast-sensitivity-functions-in" title: "Contrast Sensitivity Functions in Autoencoders" arxiv_id: "2103.00481" - abstract: "Artificial neural networks (ANNs) trained to solve low-level visual tasks have been shown to develop biological-like features. Examples include human-like receptive fields and being subject to human-like visual illusions. In this work we report that these artificial systems may also develop human-like Contrast Sensitivity Functions (CSFs). Following the interest in accurate comparison between humans and ANNs, we use psychophysical ideas to extend a recently proposed eigenanalysis to define the CSFs of such artificial systems. Results show that low-pass sensitivity to chromatic gratings and wider band-pass sensitivity to achromatic gratings may appear in convolutional autoencoders trained to enhance the retinal signals. Similarly, 3D versions of these networks may develop a spatio-temporal CSF with the basic diamond shape and bandwidth of the human window of visibility. Moreover, masking-like saturated responses to the contrast of spatio-chromatic and spatio-temporal gratings may emerge in these autoencoders. However, the consideration of a range of ANN models trained for the same statistical goal shows that deeper architectures or better accuracy in the goal do not necessarily lead to more human CSFs or more human masking-like saturations. We argue that the nontrivial interplay between goal and architecture in the optimization prevents against premature conclusions about the organization principles of the visual system if sensible architectures are not adopted. This suggests that goal-driven derivations of psychophysical phenomena should include more realistic units than those considered in conventional ANNs." + abstract: "Three decades ago, Atick et al. suggested that human frequency sensitivity may emerge from the enhancement required for a more efficient analysis of retinal images. Here we reassess the relevance of low-level vision tasks in the explanation of the Contrast Sensitivity Functions (CSFs) in light of (1) the current trend of using artificial neural networks for studying vision, and (2) the current knowledge of retinal image representations. As a first contribution, we show that a very popular type of convolutional neural networks (CNNs), called autoencoders, may develop human-like CSFs in the spatio-temporal and chromatic dimensions when trained to perform some basic low-level vision tasks (like retinal noise and optical blur removal), but not others (like chromatic adaptation). As a second contribution, we provide experimental evidence of the fact that, for some functional goals (at low abstraction level), deeper CNNs that are better in reaching the quantitative goal are actually worse in replicating human-like phenomena (such as the CSFs). This low-level result is not necessarily in contradiction with other works that report advantages of deeper nets in modeling higher-level vision goals. However, in line with a growing body of literature, our results suggests another word of caution about CNNs since the use of simplified units or unrealistic architectures in goal optimization may be a limitation for the modeling and understanding of human vision." published_date: { seconds: 1614470400 } @@ -38639,6 +38991,11 @@ pr_id_to_video: { authors: "Alex Gomez-Villa" authors: "Marcelo Bertalmio" authors: "Jesus Malo" + methods: { + name: "LINE" + full_name: "Large-scale Information Network Embedding" + description: "LINE is a novel network embedding method which is suitable for arbitrary types of information networks: undirected, directed, and/or weighted. The method optimizes a carefully designed objective function that preserves both the local and global network structures.\r\n\r\nSource: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)\r\n\r\nImage source: [Tang et al.](https://arxiv.org/pdf/1503.03578v1.pdf)" + } } papers: { paper_id: "deep-neural-models-for-color-discrimination" @@ -38695,7 +39052,7 @@ pr_id_to_video: { is_official: true url: "https://github.com/rgeirhos/object-recognition" owner: "rgeirhos" - framework: FRAMEWORK_OTHERS + framework: FRAMEWORK_TENSORFLOW number_of_stars: 30 description: "Data and materials from the paper \"Comparing deep neural networks against humans: object recognition when the signal gets weaker\" (arXiv 2017)" } @@ -38704,7 +39061,7 @@ pr_id_to_video: { video_id: "NdB_dFycXbM" video_title: "PR-287: Quantifying Behaviour of CNNs and Humans by Measuring Error Consistency" number_of_likes: 3 - number_of_views: 358 + number_of_views: 365 published_date: { seconds: 1605448482 } @@ -38728,6 +39085,14 @@ pr_id_to_video: { authors: "Giorgos Tolias" authors: "Yannis Avrithis" authors: "Ondrej Chum" + repositories: { + is_official: true + url: "https://github.com/ahmetius/LP-DeepSSL" + owner: "ahmetius" + framework: FRAMEWORK_PYTORCH + number_of_stars: 70 + description: "Code for CVPR 2019 paper Label Propagation for Deep Semi-supervised Learning" + } } papers: { paper_id: "local-label-propagation-for-large-scale-semi" @@ -38820,7 +39185,7 @@ pr_id_to_video: { video_id: "coezwQw6my0" video_title: "PR-288: Label Propagation for Deep Semi-supervised Learning" number_of_likes: 16 - number_of_views: 746 + number_of_views: 787 published_date: { seconds: 1606053375 } @@ -38854,6 +39219,11 @@ pr_id_to_video: { full_name: "Scaled Dot-Product Attention" description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } methods: { name: "Softmax" full_name: "Softmax" @@ -38862,7 +39232,12 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { name: "Dense Connections" @@ -38897,22 +39272,6 @@ pr_id_to_video: { } authors: "Mei Wang" authors: "Weihong Deng" - repositories: { - url: "https://github.com/altomator/Introduction_to_Deep_Learning-2-Face_Detection" - owner: "altomator" - framework: FRAMEWORK_TENSORFLOW - description: "Face detection with OpenCV and Google Cloud Vision" - } - repositories: { - url: "https://github.com/EvgenyDyshlyuk/DeepLearning_face_detection_embeddings_clustering" - owner: "EvgenyDyshlyuk" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/EvgenyDyshlyuk/Face_detection_embeddings_clustering" - owner: "EvgenyDyshlyuk" - framework: FRAMEWORK_OTHERS - } repositories: { url: "https://github.com/parvatijay2901/FaceNet_FR" owner: "parvatijay2901" @@ -38933,12 +39292,28 @@ pr_id_to_video: { number_of_stars: 6 description: "Implementation of a Siamese Neural Network (in Tensorflow) that defines a similarity score between a pair of person images." } + repositories: { + url: "https://github.com/EvgenyDyshlyuk/Face_detection_embeddings_clustering" + owner: "EvgenyDyshlyuk" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/EvgenyDyshlyuk/DeepLearning_face_detection_embeddings_clustering" + owner: "EvgenyDyshlyuk" + framework: FRAMEWORK_OTHERS + } + repositories: { + url: "https://github.com/altomator/Introduction_to_Deep_Learning-2-Face_Detection" + owner: "altomator" + framework: FRAMEWORK_TENSORFLOW + description: "Face detection with OpenCV and Google Cloud Vision" + } } papers: { paper_id: "domain-generalization-a-survey" - title: "Domain Generalization: A Survey" + title: "Domain Generalization in Vision: A Survey" arxiv_id: "2103.02503" - abstract: "Generalization to out-of-distribution (OOD) data is a capability natural to humans yet challenging for machines to reproduce. This is because most statistical learning algorithms strongly rely on the i.i.d.~assumption on source/target data, while in practice domain shift between source and target is common. Domain generalization (DG) aims to achieve OOD generalization by using only source data for model learning. Since first introduced in 2011, research in DG has made great progresses. In particular, intensive research in this topic has led to a broad spectrum of methodologies, e.g., those based on domain alignment, meta-learning, data augmentation, or ensemble learning, just to name a few; and has covered various applications such as object recognition, segmentation, action recognition, and person re-identification. In this paper, for the first time, a comprehensive literature review is provided to summarize the developments in DG in the past decade. Specifically, we first cover the background by formally defining DG and relating it to other research fields like domain adaptation and transfer learning. Second, we conduct a thorough review into existing methods and present a categorization based on their methodologies and motivations. Finally, we conclude this survey with insights and discussions on future research directions." + abstract: "Generalization to out-of-distribution (OOD) data is a capability natural to humans yet challenging for machines to reproduce. This is because most learning algorithms strongly rely on the i.i.d.~assumption on source/target data, which is often violated in practice due to domain shift. Domain generalization (DG) aims to achieve OOD generalization by using only source data for model learning. Since first introduced in 2011, research in DG has made great progresses. In particular, intensive research in this topic has led to a broad spectrum of methodologies, e.g., those based on domain alignment, meta-learning, data augmentation, or ensemble learning, just to name a few; and has covered various vision applications such as object recognition, segmentation, action recognition, and person re-identification. In this paper, for the first time a comprehensive literature review is provided to summarize the developments in DG for computer vision over the past decade. Specifically, we first cover the background by formally defining DG and relating it to other research fields like domain adaptation and transfer learning. Second, we conduct a thorough review into existing methods and present a categorization based on their methodologies and motivations. Finally, we conclude this survey with insights and discussions on future research directions." published_date: { seconds: 1614729600 } @@ -38951,7 +39326,7 @@ pr_id_to_video: { url: "https://github.com/KaiyangZhou/mixstyle-release" owner: "KaiyangZhou" framework: FRAMEWORK_PYTORCH - number_of_stars: 94 + number_of_stars: 104 description: "Domain Generalization with MixStyle. ICLR'21." } } @@ -38983,19 +39358,19 @@ pr_id_to_video: { authors: "Zhen Lei" authors: "Stan Z. Li" repositories: { - is_official: true - url: "https://github.com/cleardusk/3DDFA_V2" + url: "https://github.com/cleardusk/3DDFA" owner: "cleardusk" framework: FRAMEWORK_PYTORCH - number_of_stars: 1718 - description: "The official PyTorch implementation of Towards Fast, Accurate and Stable 3D Dense Face Alignment, ECCV 2020." + number_of_stars: 3054 + description: "The PyTorch improved version of TPAMI 2017 paper: Face Alignment in Full Pose Range: A 3D Total Solution." } repositories: { - url: "https://github.com/cleardusk/3DDFA" + is_official: true + url: "https://github.com/cleardusk/3DDFA_V2" owner: "cleardusk" framework: FRAMEWORK_PYTORCH - number_of_stars: 3029 - description: "The PyTorch improved version of TPAMI 2017 paper: Face Alignment in Full Pose Range: A 3D Total Solution." + number_of_stars: 1771 + description: "The official PyTorch implementation of Towards Fast, Accurate and Stable 3D Dense Face Alignment, ECCV 2020." } methods: { name: "Average Pooling" @@ -39066,14 +39441,14 @@ pr_id_to_video: { url: "https://github.com/JStehouwer/FFD_CVPR2020" owner: "JStehouwer" framework: FRAMEWORK_PYTORCH - number_of_stars: 59 + number_of_stars: 61 } } video: { video_id: "zCiq8rPhzr4" video_title: "PR-289: On the Effectiveness of Vision Transformers for Zero-shot Face Anti-Spoofing" number_of_likes: 2 - number_of_views: 180 + number_of_views: 186 published_date: { seconds: 1607871942 } @@ -39098,19 +39473,12 @@ pr_id_to_video: { authors: "Logan Engstrom" authors: "Ashish Kapoor" authors: "Aleksander Madry" - repositories: { - url: "https://github.com/lengstrom/gitlinks" - owner: "lengstrom" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 - description: "gitlinks - Git Powered Go-Links! 👴⛓" - } repositories: { is_official: true url: "https://github.com/MadryLab/robustness" owner: "MadryLab" framework: FRAMEWORK_PYTORCH - number_of_stars: 559 + number_of_stars: 572 description: "A library for experimenting with, training and evaluating neural networks, with a focus on adversarial robustness." } repositories: { @@ -39118,9 +39486,16 @@ pr_id_to_video: { url: "https://github.com/Microsoft/robust-models-transfer" owner: "Microsoft" framework: FRAMEWORK_PYTORCH - number_of_stars: 166 + number_of_stars: 167 description: "Official repository for our NeurIPS 2020 *oral* \"Do Adversarially Robust ImageNet Models Transfer Better?\"" } + repositories: { + url: "https://github.com/lengstrom/gitlinks" + owner: "lengstrom" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "gitlinks - Git Powered Go-Links! 👴⛓" + } } papers: { paper_id: "cartl-cooperative-adversarially-robust" @@ -39142,6 +39517,7 @@ pr_id_to_video: { url: "https://github.com/NISP-official/CARTL" owner: "NISP-official" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } methods: { name: "Batch Normalization" @@ -39246,7 +39622,7 @@ pr_id_to_video: { video_id: "x2L393xcL2M" video_title: "PR-290: Do Adversarially Robust ImageNet Models Transfer Better?" number_of_likes: 14 - number_of_views: 779 + number_of_views: 797 published_date: { seconds: 1606660534 } @@ -39272,11 +39648,25 @@ pr_id_to_video: { authors: "Zhen Lei" authors: "Stan Z. Li" repositories: { - url: "https://github.com/Qengineering/NanoDet-ncnn-Raspberry-Pi-4" - owner: "Qengineering" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - description: "NanoDet for a bare Raspberry Pi 4 " + url: "https://github.com/TangShengqin/ATSS-Pytorch" + owner: "TangShengqin" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection" + } + repositories: { + url: "https://github.com/open-mmlab/mmdetection" + owner: "open-mmlab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 16041 + description: "OpenMMLab Detection Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/justimyhxu/Dense-RepPoints" + owner: "justimyhxu" + framework: FRAMEWORK_PYTORCH + number_of_stars: 119 + description: "Dense reppoints: Representing visual objects with dense point sets https://arxiv.org/abs/1912.11473" } repositories: { url: "https://github.com/Qengineering/NanoDet-ncnn-Jetson-Nano" @@ -39290,29 +39680,29 @@ pr_id_to_video: { url: "https://github.com/sfzhang15/ATSS" owner: "sfzhang15" framework: FRAMEWORK_PYTORCH - number_of_stars: 891 + number_of_stars: 903 description: "Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection, CVPR, Oral, 2020" } repositories: { - url: "https://github.com/TangShengqin/ATSS-Pytorch" - owner: "TangShengqin" - framework: FRAMEWORK_PYTORCH - number_of_stars: 10 - description: "Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection" + url: "https://github.com/Qengineering/NanoDet-ncnn-Raspberry-Pi-4" + owner: "Qengineering" + framework: FRAMEWORK_OTHERS + number_of_stars: 3 + description: "NanoDet for a bare Raspberry Pi 4 " } repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" + url: "https://github.com/Yuxiang1995/ICDAR2021_MFD" + owner: "Yuxiang1995" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" + number_of_stars: 60 + description: "1st Solution For ICDAR 2021 Competition on Mathematical Formula Detection(公式检测冠军方案)" } repositories: { - url: "https://github.com/justimyhxu/Dense-RepPoints" - owner: "justimyhxu" + url: "https://github.com/RangiLyu/nanodet" + owner: "RangiLyu" framework: FRAMEWORK_PYTORCH - number_of_stars: 120 - description: "Dense reppoints: Representing visual objects with dense point sets https://arxiv.org/abs/1912.11473" + number_of_stars: 3166 + description: "⚡Super fast and lightweight anchor-free object detection model. 🔥Only 980 KB(int8) / 1.8MB (fp16) and run 97FPS on cellphone🔥" } methods: { name: "Bottleneck Residual Block" @@ -39384,8 +39774,8 @@ pr_id_to_video: { is_official: true url: "https://github.com/SHI-Labs/Pseudo-IoU-for-Anchor-Free-Object-Detection" owner: "SHI-Labs" - framework: FRAMEWORK_OTHERS - number_of_stars: 12 + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 description: "Pseudo-IoU: Improving Label Assignment in Anchor-Free Object Detection" } } @@ -39400,33 +39790,33 @@ pr_id_to_video: { authors: "Kang Kim" authors: "Hee Seok Lee" repositories: { - url: "https://github.com/kkhoot/PAA_Faster-RCNN" - owner: "kkhoot" + url: "https://github.com/open-mmlab/mmdetection" + owner: "open-mmlab" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "An implementation of PAA (Probabilistic Anchor Assignment with IoU Prediction for Object Detection) applied to Faster RCNN" + number_of_stars: 16041 + description: "OpenMMLab Detection Toolbox and Benchmark" + } + repositories: { + url: "https://github.com/feiyuhuahuo/PAA_minimal" + owner: "feiyuhuahuo" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "Minimal PyTorch implementation of ECCV2020: Probabilistic Anchor Assignment with IoU Prediction for Object Detection." } repositories: { is_official: true url: "https://github.com/kkhoot/PAA" owner: "kkhoot" framework: FRAMEWORK_PYTORCH - number_of_stars: 203 + number_of_stars: 205 description: "A PyTorch implementation of the paper `Probabilistic Anchor Assignment with IoU Prediction for Object Detection` ECCV 2020 (https://arxiv.org/abs/2007.08103)" } repositories: { - url: "https://github.com/feiyuhuahuo/PAA_minimal" - owner: "feiyuhuahuo" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Minimal PyTorch implementation of ECCV2020: Probabilistic Anchor Assignment with IoU Prediction for Object Detection." - } - repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" + url: "https://github.com/kkhoot/PAA_Faster-RCNN" + owner: "kkhoot" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" + number_of_stars: 4 + description: "An implementation of PAA (Probabilistic Anchor Assignment with IoU Prediction for Object Detection) applied to Faster RCNN" } methods: { name: "FPN" @@ -39470,21 +39860,21 @@ pr_id_to_video: { authors: "Cheng Chi" authors: "Fangyun Wei" authors: "Han Hu" - repositories: { - url: "https://github.com/shinya7y/UniverseNet" - owner: "shinya7y" - framework: FRAMEWORK_PYTORCH - number_of_stars: 200 - description: "Object detection. EfficientDet-D5 level COCO AP in 20 epochs. SOTA single-stage detector on Waymo Open Dataset." - } repositories: { is_official: true url: "https://github.com/microsoft/RelationNet2" owner: "microsoft" framework: FRAMEWORK_OTHERS - number_of_stars: 185 + number_of_stars: 187 description: "RelationNet++: Bridging Visual Representations for Object Detection via Transformer Decoder" } + repositories: { + url: "https://github.com/shinya7y/UniverseNet" + owner: "shinya7y" + framework: FRAMEWORK_PYTORCH + number_of_stars: 215 + description: "Object detection. EfficientDet-D5 level COCO AP in 20 epochs. SOTA single-stage detector on Waymo Open Dataset." + } methods: { name: "FPN" full_name: "Feature Pyramid Network" @@ -39615,75 +40005,73 @@ pr_id_to_video: { authors: "Chien-Yao Wang" authors: "Hong-Yuan Mark Liao" repositories: { - url: "https://github.com/wiegehtki/zoneminder-jetson" - owner: "wiegehtki" + url: "https://github.com/Abhi-899/YOLOV4-Custom-Object-Detection" + owner: "Abhi-899" framework: FRAMEWORK_OTHERS - description: "Personen und Gesichtserkennung mit Zoneminder, OpenCV (GPU), YOLO, cuDNN und CUDA" + description: "In this project we will train the YOLOV4 network on 3 classes 'Ambulance' , 'Car' , 'Person' with the Google open image dataset and run the detection on a real video caught on a moving traffic camera" } repositories: { - url: "https://github.com/hunter10bt/DeepLearningFinalPresentation" - owner: "hunter10bt" - framework: FRAMEWORK_OTHERS - } - repositories: { - url: "https://github.com/wangermeng2021/Scaled-YOLOv4-tensorflow2" - owner: "wangermeng2021" + url: "https://github.com/Lebhoryi/keras-YOLOv3-model-set" + owner: "Lebhoryi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 24 - description: "A Tensorflow2.x implementation of Scaled-YOLOv4 as described in Scaled-YOLOv4: Scaling Cross Stage Partial Network" + number_of_stars: 1 + description: "转自https://github.com/david8862/keras-YOLOv3-model-set " } repositories: { - url: "https://github.com/FelixFu520/yolov4" - owner: "FelixFu520" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/RobotMobile/cv-deep-learning-paper-review" + owner: "RobotMobile" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 } repositories: { - url: "https://github.com/david8862/keras-YOLOv3-model-set" - owner: "david8862" + url: "https://github.com/otamajakusi/darknet-yolov4" + owner: "otamajakusi" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 494 - description: "end-to-end YOLOv4/v3/v2 object detection pipeline, implemented on tf.keras with different technologies" } repositories: { - url: "https://github.com/MEME-Phoenix/Autonomous-Driving-Cart-MEME" - owner: "MEME-Phoenix" - framework: FRAMEWORK_PYTORCH - description: "Autonomous Driving Cart, MEME" + url: "https://github.com/ccie29441/Yolo-v4-and-Yolo-v3-v2-for-Windows-and-Linux" + owner: "ccie29441" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1 } repositories: { - url: "https://github.com/Qengineering/YoloV4-ncnn-Jetson-Nano" - owner: "Qengineering" + url: "https://github.com/Dodant/ANPR-with-Yolov4" + owner: "Dodant" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "YoloV4 for Jetson Nano" + number_of_stars: 10 + description: "2020-1 CNU SW Capstone Design Project" } repositories: { - url: "https://github.com/CRIGIM/darknet" - owner: "CRIGIM" + url: "https://github.com/hhk7734/tensorflow-yolov4" + owner: "hhk7734" framework: FRAMEWORK_TENSORFLOW - description: "edited darknet" + number_of_stars: 126 + description: "YOLOv4 Implemented in Tensorflow 2." } repositories: { - url: "https://github.com/ayoungkang/yolov4" - owner: "ayoungkang" + url: "https://github.com/weidalin/yolov4_mixup" + owner: "weidalin" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "yolov4 data augments with mixup" } repositories: { - url: "https://github.com/taeokimeng/object-detection-yolo" - owner: "taeokimeng" + url: "https://github.com/Qengineering/YoloV4-ncnn-Raspberry-Pi-64-bit" + owner: "Qengineering" framework: FRAMEWORK_OTHERS - number_of_stars: 1 - description: "Object Detection with YOLO and Streamlit" + number_of_stars: 25 + description: "YoloV4 on a bare Raspberry Pi 4 with ncnn framework" } - methods: { - name: "Pointwise Convolution" - full_name: "Pointwise Convolution" - description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + repositories: { + url: "https://github.com/RunzhaoHuang/DeepSort_YOLOV5_OnScreen" + owner: "RunzhaoHuang" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } methods: { - name: "Spatial Attention Module" - full_name: "Spatial Attention Module" - description: "A **Spatial Attention Module** is a module for spatial attention in convolutional neural networks. It generates a spatial attention map by utilizing the inter-spatial relationship of features. Different from the [channel attention](https://paperswithcode.com/method/channel-attention-module), the spatial attention focuses on where is an informative part, which is complementary to the channel attention. To compute the spatial attention, we first apply average-pooling and max-pooling operations along the channel axis and concatenate them to generate an efficient feature descriptor. On the concatenated feature descriptor, we apply a convolution layer to generate a spatial attention map $\\textbf{M}\\_{s}\\left(F\\right) \\in \\mathcal{R}^{H×W}$ which encodes where to emphasize or suppress. \r\n\r\nWe aggregate channel information of a feature map by using two pooling operations, generating two 2D maps: $\\mathbf{F}^{s}\\_{avg} \\in \\mathbb{R}^{1\\times{H}\\times{W}}$ and $\\mathbf{F}^{s}\\_{max} \\in \\mathbb{R}^{1\\times{H}\\times{W}}$. Each denotes average-pooled features and max-pooled features across the channel. Those are then concatenated and convolved by a standard convolution layer, producing the 2D spatial attention map. In short, the spatial attention is computed as:\r\n\r\n$$ \\textbf{M}\\_{s}\\left(F\\right) = \\sigma\\left(f^{7x7}\\left(\\left[\\text{AvgPool}\\left(F\\right);\\text{MaxPool}\\left(F\\right)\\right]\\right)\\right) $$\r\n\r\n$$ \\textbf{M}\\_{s}\\left(F\\right) = \\sigma\\left(f^{7x7}\\left(\\left[\\mathbf{F}^{s}\\_{avg};\\mathbf{F}^{s}\\_{max} \\right]\\right)\\right) $$\r\n\r\nwhere $\\sigma$ denotes the sigmoid function and $f^{7×7}$ represents a convolution operation with the filter size of 7 × 7." + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } methods: { name: "Bottom-up Path Augmentation" @@ -39691,19 +40079,24 @@ pr_id_to_video: { description: "**Bottom-up Path Augmentation** is a feature extraction technique that seeks to shorten the information path and enhance a feature pyramid with accurate localization signals existing in low-levels. This is based on the fact that high response to edges or instance parts is a strong indicator to accurately localize instances. \r\n\r\nEach building block takes a higher resolution feature map $N\\_{i}$ and a coarser map $P\\_{i+1}$ through lateral connection and generates the new feature map $N\\_{i+1}$ Each feature map $N\\_{i}$ first goes through a $3 \\times 3$ convolutional layer with stride $2$ to reduce the spatial size. Then each element of feature map $P\\_{i+1}$ and the down-sampled map are added through lateral connection. The fused feature map is then processed by another $3 \\times 3$ convolutional layer to generate $N\\_{i+1}$ for following sub-networks. This is an iterative process and terminates after approaching $P\\_{5}$. In these building blocks, we consistently use channel 256 of feature maps. The feature grid for each proposal is then pooled from new feature maps, i.e., {$N\\_{2}$, $N\\_{3}$, $N\\_{4}$, $N\\_{5}$}." } methods: { - name: "PAFPN" - full_name: "PAFPN" - description: "**PAFPN** is a feature pyramid module used in Path Aggregation networks ([PANet](https://paperswithcode.com/method/panet)) that combines FPNs with bottom-up path augmentation, which shortens the information path between lower layers and topmost feature." + name: "k-Means Clustering" + full_name: "k-Means Clustering" + description: "**k-Means Clustering** is a clustering algorithm that divides a training set into $k$ different clusters of examples that are near each other. It works by initializing $k$ different centroids {$\\mu\\left(1\\right),\\ldots,\\mu\\left(k\\right)$} to different values, then alternating between two steps until convergence:\r\n\r\n(i) each training example is assigned to cluster $i$ where $i$ is the index of the nearest centroid $\\mu^{(i)}$\r\n\r\n(ii) each centroid $\\mu^{(i)}$ is updated to the mean of all training examples $x^{(j)}$ assigned to cluster $i$.\r\n\r\nText Source: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [scikit-learn](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_digits.html)" + } + methods: { + name: "RoIAlign" + full_name: "RoIAlign" + description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." } methods: { - name: "DIoU-NMS" - full_name: "DIoU-NMS" - description: "**DIoU-NMS** is a type of non-maximum suppression where we use Distance IoU rather than regular DIoU, in which the overlap area and the distance between two central points of bounding boxes are simultaneously considered when suppressing redundant boxes.\r\n\r\nIn original NMS, the IoU metric is used to suppress the redundant detection boxes, where the overlap area is the unique factor, often yielding false suppression for the cases with occlusion. With DIoU-NMS, we not only consider the overlap area but also central point distance between two boxes." + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "Cosine Annealing" - full_name: "Cosine Annealing" - description: "**Cosine Annealing** is a type of learning rate schedule that has the effect of starting with a large learning rate that is relatively rapidly decreased to a minimum value before being increased rapidly again. The resetting of the learning rate acts like a simulated restart of the learning process and the re-use of good weights as the starting point of the restart is referred to as a \"warm restart\" in contrast to a \"cold restart\" where a new set of small random numbers may be used as a starting point.\r\n\r\n$$\\eta\\_{t} = \\eta\\_{min}^{i} + \\frac{1}{2}\\left(\\eta\\_{max}^{i}-\\eta\\_{min}^{i}\\right)\\left(1+\\cos\\left(\\frac{T\\_{cur}}{T\\_{i}}\\pi\\right)\\right)\r\n$$\r\n\r\nWhere where $\\eta\\_{min}^{i}$ and $ \\eta\\_{max}^{i}$ are ranges for the learning rate, and $T\\_{cur}$ account for how many epochs have been performed since the last restart.\r\n\r\nText Source: [Jason Brownlee](https://machinelearningmastery.com/snapshot-ensemble-deep-learning-neural-network/)\r\n\r\nImage Source: [Gao Huang](https://www.researchgate.net/figure/Training-loss-of-100-layer-DenseNet-on-CIFAR10-using-standard-learning-rate-blue-and-M_fig2_315765130)" + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { name: "Weight Decay" @@ -39711,19 +40104,19 @@ pr_id_to_video: { description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" } methods: { - name: "YOLOv3" - full_name: "YOLOv3" - description: "**YOLOv3** is a real-time, single-stage object detection model that builds on [YOLOv2](https://paperswithcode.com/method/yolov2) with several improvements. Improvements include the use of a new backbone network, Darknet-53 that utilises residual connections, or in the words of the author, \"those newfangled residual network stuff\", as well as some improvements to the bounding box prediction step, and use of three different scales from which to extract features (similar to an FPN)." + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "RoIAlign" - full_name: "RoIAlign" - description: "**Region of Interest Align**, or **RoIAlign**, is an operation for extracting a small feature map from each RoI in detection and segmentation based tasks. It removes the harsh quantization of [RoI Pool](https://paperswithcode.com/method/roi-pooling), properly *aligning* the extracted features with the input. To avoid any quantization of the RoI boundaries or bins (using $x/16$ instead of $[x/16]$), RoIAlign uses bilinear interpolation to compute the exact values of the input features at four regularly sampled locations in each RoI bin, and the result is then aggregated (using max or average)." + name: "Spatial Pyramid Pooling" + full_name: "Spatial Pyramid Pooling" + description: "** Spatial Pyramid Pooling (SPP)** is a pooling layer that removes the fixed-size constraint of the network, i.e. a CNN does not require a fixed-size input image. Specifically, we add an SPP layer on top of the last convolutional layer. The SPP layer pools the features and generates fixed-length outputs, which are then fed into the fully-connected layers (or other classifiers). In other words, we perform some information aggregation at a deeper stage of the network hierarchy (between convolutional layers and fully-connected layers) to avoid the need for cropping or warping at the beginning." } methods: { - name: "Depthwise Separable Convolution" - full_name: "Depthwise Separable Convolution" - description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } } papers: { @@ -39741,7 +40134,7 @@ pr_id_to_video: { url: "https://github.com/Alan-D-Chen/CDIoU-CDIoUloss" owner: "Alan-D-Chen" framework: FRAMEWORK_PYTORCH - number_of_stars: 18 + number_of_stars: 17 description: "🔥CDIoU and CDIoU loss is like a convenient plug-in that can be used in multiple models. CDIoU and CDIoU loss have different excellent performances in several models such as Faster R-CNN, YOLOv4, RetinaNet and . There is a maximum AP improvement of 1.9% and an average AP of 0.8% improvement on MS COCO dataset, compared to traditional evaluation-feedback modules. Here we just use as an example to illustrate the code." } methods: { @@ -39813,7 +40206,7 @@ pr_id_to_video: { url: "https://github.com/Megvii-BaseDetection/OTA" owner: "Megvii-BaseDetection" framework: FRAMEWORK_PYTORCH - number_of_stars: 107 + number_of_stars: 132 description: "Official implementation of our CVPR2021 paper \"OTA: Optimal Transport Assignment for Object Detection\" in Pytorch." } } @@ -39836,7 +40229,7 @@ pr_id_to_video: { url: "https://github.com/Duankaiwen/CPNDet" owner: "Duankaiwen" framework: FRAMEWORK_PYTORCH - number_of_stars: 164 + number_of_stars: 169 description: "Corner Proposal Network for Anchor-free, Two-stage Object Detection" } methods: { @@ -39848,8 +40241,8 @@ pr_id_to_video: { video: { video_id: "SxdNVSDPIOo" video_title: "PR-291: Bridging the Gap Between Anchor-based and Anchor-free Detection via ATSS" - number_of_likes: 4 - number_of_views: 314 + number_of_likes: 5 + number_of_views: 338 published_date: { seconds: 1607267659 } @@ -39877,26 +40270,33 @@ pr_id_to_video: { authors: "James A. Yorke" authors: "Cornelia Fermüller" authors: "Yiannis Aloimonos" - repositories: { - url: "https://github.com/purbayankar/FeatureFusionUNet" - owner: "purbayankar" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "Various UNet models for satellite images change detection using OSCD dataset" - } repositories: { is_official: true url: "https://github.com/deconvolutionpaper/deconvolution" owner: "deconvolutionpaper" framework: FRAMEWORK_PYTORCH - number_of_stars: 157 + number_of_stars: 161 } repositories: { is_official: true url: "https://github.com/yechengxi/deconvolution" owner: "yechengxi" framework: FRAMEWORK_PYTORCH - number_of_stars: 157 + number_of_stars: 161 + } + repositories: { + url: "https://github.com/purbayankar/Advanced_GAIN" + owner: "purbayankar" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "Generative Adversarial Network for missing data imputation" + } + repositories: { + url: "https://github.com/purbayankar/FeatureFusionUNet" + owner: "purbayankar" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "Various UNet models for satellite images change detection using OSCD dataset" } methods: { name: "Convolution" @@ -40044,17 +40444,17 @@ pr_id_to_video: { authors: "Michael Liebling" repositories: { is_official: true - url: "https://github.com/ashajkofci/semiblindpsfdeconv" - owner: "ashajkofci" + url: "https://github.com/idiap/semiblindpsfdeconv" + owner: "idiap" framework: FRAMEWORK_PYTORCH + number_of_stars: 14 description: "Code for \"Semi-Blind Spatially-Variant Deconvolution in Optical Microscopy with Local Point Spread Function Estimation By Use Of Convolutional Neural Networks\" ICIP 2018" } repositories: { is_official: true - url: "https://github.com/idiap/semiblindpsfdeconv" - owner: "idiap" + url: "https://github.com/ashajkofci/semiblindpsfdeconv" + owner: "ashajkofci" framework: FRAMEWORK_PYTORCH - number_of_stars: 14 description: "Code for \"Semi-Blind Spatially-Variant Deconvolution in Optical Microscopy with Local Point Spread Function Estimation By Use Of Convolutional Neural Networks\" ICIP 2018" } } @@ -40075,8 +40475,8 @@ pr_id_to_video: { video: { video_id: "CInxKZz96ec" video_title: "PR-292: Network Deconvolution" - number_of_likes: 13 - number_of_views: 641 + number_of_likes: 16 + number_of_views: 667 published_date: { seconds: 1607335532 } @@ -40100,6 +40500,13 @@ pr_id_to_video: { authors: "Yujun Shen" authors: "Deli Zhao" authors: "Bolei Zhou" + repositories: { + url: "https://github.com/genforce/idinvert" + owner: "genforce" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 353 + description: "[ECCV 2020] In-Domain GAN Inversion for Real Image Editing" + } repositories: { url: "https://github.com/phillips96/CRA-Inpainting" owner: "phillips96" @@ -40107,13 +40514,6 @@ pr_id_to_video: { number_of_stars: 8 description: "Tensorflow2 Re-Implementation of \"Contextual Residual Aggregation for Ultra High-Resolution Image Inpainting\" - CVPR 2020 Oral" } - repositories: { - url: "https://github.com/genforce/idinvert" - owner: "genforce" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 341 - description: "[ECCV 2020] In-Domain GAN Inversion for Real Image Editing" - } methods: { name: "GAN" full_name: "Generative Adversarial Network" @@ -40144,15 +40544,9 @@ pr_id_to_video: { url: "https://github.com/weihaox/awesome-gan-inversion" owner: "weihaox" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 223 + number_of_stars: 276 description: "A collection of resources on GAN inversion." } - repositories: { - url: "https://github.com/weihaox/documents" - owner: "weihaox" - framework: FRAMEWORK_OTHERS - number_of_stars: 3 - } methods: { name: "GAN Hinge Loss" full_name: "GAN Hinge Loss" @@ -40221,7 +40615,7 @@ pr_id_to_video: { url: "https://github.com/danielroich/PTI" owner: "danielroich" framework: FRAMEWORK_PYTORCH - number_of_stars: 185 + number_of_stars: 214 description: "Official Implementation for \"Pivotal Tuning for Latent-based editing of Real Images\" https://arxiv.org/abs/2106.05744" } methods: { @@ -40396,7 +40790,7 @@ pr_id_to_video: { url: "https://github.com/AnonSubm2021/TransStyleGAN" owner: "AnonSubm2021" framework: FRAMEWORK_PYTORCH - number_of_stars: 17 + number_of_stars: 21 } methods: { name: "Adaptive Instance Normalization" @@ -40482,7 +40876,7 @@ pr_id_to_video: { video_id: "TVSJO9uNq7g" video_title: "PR-293: In-Domain GAN Inversion for Real Image Editing" number_of_likes: 8 - number_of_views: 292 + number_of_views: 347 published_date: { seconds: 1611494825 } @@ -40520,33 +40914,36 @@ pr_id_to_video: { authors: "Furu Wei" authors: "Ming Zhou" repositories: { - is_official: true - url: "https://github.com/microsoft/unilm" - owner: "microsoft" + url: "https://github.com/cydal/LayoutML_pytorch" + owner: "cydal" + framework: FRAMEWORK_OTHERS + description: "Text and Layout Document Image Understanding. LayoutLM" + } + repositories: { + url: "https://github.com/microsoft/unilm/tree/master/layoutlm" + owner: "master" framework: FRAMEWORK_PYTORCH - number_of_stars: 2311 + number_of_stars: 2565 description: "UniLM AI - Unified \"Language\" Model Pre-training across Tasks, Languages, and Modalities" } repositories: { - url: "https://github.com/huggingface/transformers" - owner: "huggingface" + url: "https://github.com/BordiaS/layoutlm" + owner: "BordiaS" framework: FRAMEWORK_PYTORCH - number_of_stars: 48493 - description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + number_of_stars: 33 } repositories: { - url: "https://github.com/lulia0228/Document_IE" - owner: "lulia0228" + url: "https://github.com/doc-analysis/DocBank" + owner: "doc-analysis" framework: FRAMEWORK_OTHERS - number_of_stars: 7 - description: "GCN use for semi-construct document information extraction." + number_of_stars: 222 + description: "DocBank: A Benchmark Dataset for Document Layout Analysis" } repositories: { - url: "https://github.com/omarsou/layoutlm_CORD" - owner: "omarsou" - framework: FRAMEWORK_OTHERS - number_of_stars: 15 - description: "Evaluation of the Layoutlm model on the CORD dataset" + url: "https://github.com/kenAlparslan/Texttract" + owner: "kenAlparslan" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 } repositories: { url: "https://github.com/nunenuh/layoutlm.pytorch" @@ -40555,31 +40952,34 @@ pr_id_to_video: { number_of_stars: 1 } repositories: { - url: "https://github.com/BordiaS/layoutlm" - owner: "BordiaS" + url: "https://github.com/omarsou/layoutlm_CORD" + owner: "omarsou" + framework: FRAMEWORK_OTHERS + number_of_stars: 15 + description: "Evaluation of the Layoutlm model on the CORD dataset" + } + repositories: { + url: "https://github.com/huggingface/transformers" + owner: "huggingface" framework: FRAMEWORK_PYTORCH - number_of_stars: 31 + number_of_stars: 49984 + description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { - url: "https://github.com/kenAlparslan/Texttract" - owner: "kenAlparslan" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 + url: "https://github.com/lulia0228/Document_IE" + owner: "lulia0228" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "GCN use for semi-construct document information extraction." } repositories: { - url: "https://github.com/microsoft/unilm/tree/master/layoutlm" - owner: "master" + is_official: true + url: "https://github.com/microsoft/unilm" + owner: "microsoft" framework: FRAMEWORK_PYTORCH - number_of_stars: 2310 + number_of_stars: 2565 description: "UniLM AI - Unified \"Language\" Model Pre-training across Tasks, Languages, and Modalities" } - repositories: { - url: "https://github.com/doc-analysis/DocBank" - owner: "doc-analysis" - framework: FRAMEWORK_OTHERS - number_of_stars: 214 - description: "DocBank: A Benchmark Dataset for Document Layout Analysis" - } } papers: { paper_id: "190412577" @@ -40625,7 +41025,7 @@ pr_id_to_video: { url: "https://github.com/malllabiisc/NeuralDater" owner: "malllabiisc" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 58 + number_of_stars: 59 description: "ACL 2018: Dating Documents using Graph Convolution Networks" } } @@ -40641,20 +41041,20 @@ pr_id_to_video: { authors: "Endi Niu" authors: "Zhuo Wu" authors: "Xiaoguang Wang" - repositories: { - url: "https://github.com/vincentAGNES/OCR-Extract-total-amount-TTC-of-receipts" - owner: "vincentAGNES" - framework: FRAMEWORK_OTHERS - number_of_stars: 7 - } repositories: { is_official: true url: "https://github.com/vsymbol/CUTIE" owner: "vsymbol" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 108 + number_of_stars: 111 description: "CUTIE (TensorFlow implementation of Convolutional Universal Text Information Extractor)" } + repositories: { + url: "https://github.com/vincentAGNES/OCR-Extract-total-amount-TTC-of-receipts" + owner: "vincentAGNES" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + } } papers: { paper_id: "a-survey-of-deep-learning-approaches-for-ocr" @@ -40703,7 +41103,7 @@ pr_id_to_video: { url: "https://github.com/xiaoqian19940510/text-classification-" owner: "xiaoqian19940510" framework: FRAMEWORK_PYTORCH - number_of_stars: 282 + number_of_stars: 298 description: "文本分类资源汇总,包括深度学习文本分类模型,如SpanBERT、ALBERT、RoBerta、Xlnet、MT-DNN、BERT、TextGCN、MGAN、TextCapsule、SGNN、SGM、LEAM、ULMFiT、DGCNN、ELMo、RAM、DeepMoji、IAN、DPCNN、TopicRNN、LSTMN 、Multi-Task、HAN、CharCNN、Tree-LSTM、DAN、TextRCNN、Paragraph-Vec、TextCNN、DCNN、RNTN、MV-RNN、RAE等,浅层学习模型,如LightGBM 、SVM、XGboost、Random Forest、C4.5、CART、KNN、NB、HMM等。介绍文本分类数据集,如MR、SST、MPQA、IMDB、Yelp、20NG、AG、R8、DBpedia、Ohsumed、SQuAD、SNLI、MNLI、MSRP、MRDA、RCV1、AAPD,评价指标,如accuracy、Precision、Recall、F1、EM、MRR、HL、Micro-F1、Macro-F1、P@K,和技术挑战,包括多标签文本分类。" } } @@ -40724,7 +41124,7 @@ pr_id_to_video: { url: "https://github.com/Psarpei/Multi-Type-TD-TSR" owner: "Psarpei" framework: FRAMEWORK_PYTORCH - number_of_stars: 47 + number_of_stars: 55 description: "Extracting Tables from Document Images using a Multi-stage Pipeline for Table Detection and Table Structure Recognition:" } } @@ -40750,7 +41150,7 @@ pr_id_to_video: { video: { video_id: "D5NcGO3QV6s" video_title: "PR-294: Document AI - Structured Documents Understanding using Deep Learning" - number_of_views: 805 + number_of_views: 885 published_date: { seconds: 1608474978 } @@ -40776,20 +41176,6 @@ pr_id_to_video: { authors: "Sanjay E. Sarma" authors: "Michael M. Bronstein" authors: "Justin M. Solomon" - repositories: { - url: "https://github.com/princeton-vl/SimpleView" - owner: "princeton-vl" - framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "Official Code for ICML 2021 paper \"Revisiting Point Cloud Shape Classification with a Simple and Effective Baseline\"" - } - repositories: { - is_official: true - url: "https://github.com/WangYueFt/dgcnn" - owner: "WangYueFt" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 962 - } repositories: { url: "https://github.com/lingzhang1/dgcnn_v2" owner: "lingzhang1" @@ -40824,7 +41210,7 @@ pr_id_to_video: { url: "https://github.com/hqucms/ParticleNet" owner: "hqucms" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 16 + number_of_stars: 17 description: "Implementation of the jet classification network in ParticleNet: Jet Tagging via Particle Clouds" } repositories: { @@ -40836,9 +41222,23 @@ pr_id_to_video: { url: "https://github.com/AnTao97/dgcnn.pytorch" owner: "AnTao97" framework: FRAMEWORK_PYTORCH - number_of_stars: 212 + number_of_stars: 224 description: "A PyTorch implementation of Dynamic Graph CNN for Learning on Point Clouds (DGCNN)" } + repositories: { + url: "https://github.com/vinits5/learning3d" + owner: "vinits5" + framework: FRAMEWORK_PYTORCH + number_of_stars: 234 + description: "This is a complete package of recent deep learning methods for 3D point clouds in pytorch (with pretrained models)." + } + repositories: { + url: "https://github.com/hansen7/NRS_3D" + owner: "hansen7" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "Neural Random Subspace (3D Part)" + } } papers: { paper_id: "linked-dynamic-graph-cnn-learning-on-point" @@ -40895,24 +41295,24 @@ pr_id_to_video: { authors: "Li Liu" authors: "Mohammed Bennamoun" repositories: { - url: "https://github.com/vijaylaxmid/DeepLearning_3DPointClouds" - owner: "vijaylaxmid" - framework: FRAMEWORK_OTHERS + url: "https://github.com/TiagoCortinhal/SalsaNext" + owner: "TiagoCortinhal" + framework: FRAMEWORK_PYTORCH + number_of_stars: 190 + description: "Uncertainty-aware Semantic Segmentation of LiDAR Point Clouds for Autonomous Driving" } repositories: { is_official: true url: "https://github.com/QingyongHu/SoTA-Point-Cloud" owner: "QingyongHu" framework: FRAMEWORK_OTHERS - number_of_stars: 964 + number_of_stars: 987 description: "🔥Deep Learning for 3D Point Clouds (IEEE TPAMI, 2020)" } repositories: { - url: "https://github.com/TiagoCortinhal/SalsaNext" - owner: "TiagoCortinhal" - framework: FRAMEWORK_PYTORCH - number_of_stars: 184 - description: "Uncertainty-aware Semantic Segmentation of LiDAR Point Clouds for Autonomous Driving" + url: "https://github.com/vijaylaxmid/DeepLearning_3DPointClouds" + owner: "vijaylaxmid" + framework: FRAMEWORK_OTHERS } } papers: { @@ -40960,7 +41360,7 @@ pr_id_to_video: { url: "https://github.com/hht1996ok/VA-GCN" owner: "hht1996ok" framework: FRAMEWORK_PYTORCH - number_of_stars: 58 + number_of_stars: 59 description: "A new GCN model for Point Cloud Analyse" } methods: { @@ -40972,8 +41372,8 @@ pr_id_to_video: { video: { video_id: "CyFANS_Itb8" video_title: "PR-295: Dynamic Graph CNN for Learning on Point Clouds" - number_of_likes: 8 - number_of_views: 895 + number_of_likes: 9 + number_of_views: 948 published_date: { seconds: 1608475531 } @@ -41019,7 +41419,7 @@ pr_id_to_video: { url: "https://github.com/xidongbo/AITM" owner: "xidongbo" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 + number_of_stars: 16 description: "TensorFlow implementation of Adaptive Information Transfer Multi-task (AITM) framework. Code for the paper submitted to KDD21: Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning for Customer Acquisition." } } @@ -41058,7 +41458,7 @@ pr_id_to_video: { url: "https://github.com/google-research/google-research/tree/master/dselect_k_moe" owner: "master" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18403 + number_of_stars: 18790 description: "Google Research" } } @@ -41092,7 +41492,7 @@ pr_id_to_video: { url: "https://github.com/Hzzone/MTLFace" owner: "Hzzone" framework: FRAMEWORK_PYTORCH - number_of_stars: 56 + number_of_stars: 65 description: "When Age-Invariant Face Recognition Meets Face Age Synthesis: A Multi-Task Learning Framework (CVPR 2021 oral)" } } @@ -41170,8 +41570,8 @@ pr_id_to_video: { video: { video_id: "2hd0rQhht1Q" video_title: "PR-296: Progressive Layered Extraction (PLE): A Novel MTL Model for Personalized Recommendations" - number_of_likes: 8 - number_of_views: 739 + number_of_likes: 9 + number_of_views: 783 published_date: { seconds: 1610293260 } @@ -41198,35 +41598,24 @@ pr_id_to_video: { authors: "Alexandre Sablayrolles" authors: "Hervé Jégou" repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } - repositories: { - url: "https://github.com/bshantam97/Attention_Based_Networks" - owner: "bshantam97" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/tianhai123/vit-pytorch" - owner: "tianhai123" + url: "https://github.com/huggingface/transformers" + owner: "huggingface" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 49984 + description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { - url: "https://github.com/lucidrains/vit-pytorch" - owner: "lucidrains" + url: "https://github.com/UdbhavPrasad072300/Transformer-Implementations" + owner: "UdbhavPrasad072300" framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 - description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + number_of_stars: 18 + description: "Library - Vanilla, ViT, DeiT, BERT, GPT" } repositories: { url: "https://github.com/TACJu/TransFG" owner: "TACJu" framework: FRAMEWORK_PYTORCH - number_of_stars: 117 + number_of_stars: 125 description: "This is the official PyTorch implementation of the paper \"TransFG: A Transformer Architecture for Fine-grained Recognition\" (Ju He, Jie-Neng Chen, Shuai Liu, Adam Kortylewski, Cheng Yang, Yutong Bai, Changhu Wang, Alan Yuille)." } repositories: { @@ -41234,15 +41623,40 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/deit" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1967 + number_of_stars: 2047 description: "Official DeiT repository" } repositories: { - url: "https://github.com/UdbhavPrasad072300/Transformer-Implementations" - owner: "UdbhavPrasad072300" + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 17 - description: "Library - Vanilla, ViT, DeiT, BERT, GPT" + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleClas" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2166 + description: "A treasure chest for visual recognition powered by PaddlePaddle" + } + repositories: { + url: "https://github.com/tianhai123/vit-pytorch" + owner: "tianhai123" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + url: "https://github.com/bshantam97/Attention_Based_Networks" + owner: "bshantam97" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/cogtoolslab/physics-benchmarking-neurips2021" + owner: "cogtoolslab" + framework: FRAMEWORK_OTHERS + number_of_stars: 9 + description: "Repo for \"Physion: Evaluating Physical Prediction from Vision in Humans and Machines\" submission to NeurIPS 2021 (Datasets & Benchmarks track)" } methods: { name: "Attention Dropout" @@ -41321,53 +41735,67 @@ pr_id_to_video: { description: "Minimal Flax implementation of \"ResMLP: Feedforward networks for image classification with data-efficient training\" (https://arxiv.org/abs/2105.03404)" } repositories: { - url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" - owner: "xmu-xiaoma666" - framework: FRAMEWORK_PYTORCH - number_of_stars: 840 - description: "🍀 Pytorch implementation of various Attention Mechanisms, MLP, Re-parameter, Convolution, which is helpful to further understand papers.⭐⭐⭐" - } - repositories: { - url: "https://github.com/facebookresearch/deit" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1967 - description: "Official DeiT repository" + url: "https://github.com/leondgarse/Keras_mlp" + owner: "leondgarse" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 4 + description: "Keras implementation of mlp-mixer, ResMLP. imagenet/imagenet21k weights reloaded." } repositories: { is_official: true url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } repositories: { - url: "https://github.com/jaketae/res-mlp" - owner: "jaketae" + url: "https://github.com/rishikksh20/ResMLP-pytorch" + owner: "rishikksh20" framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of ResMLP: Feedforward networks for image classification with data-efficient training" + number_of_stars: 7 + description: "ResMLP: Feedforward networks for image classification with data-efficient training" + } + repositories: { + url: "https://github.com/lucidrains/res-mlp-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 150 + description: "Implementation of ResMLP, an all MLP solution to image classification, in Pytorch" } repositories: { url: "https://github.com/leaderj1001/Bag-of-MLP" owner: "leaderj1001" framework: FRAMEWORK_PYTORCH - number_of_stars: 15 + number_of_stars: 16 description: "Bag of MLP" } repositories: { - url: "https://github.com/lucidrains/res-mlp-pytorch" - owner: "lucidrains" + url: "https://github.com/jaketae/res-mlp" + owner: "jaketae" framework: FRAMEWORK_PYTORCH - number_of_stars: 148 - description: "Implementation of ResMLP, an all MLP solution to image classification, in Pytorch" + description: "PyTorch implementation of ResMLP: Feedforward networks for image classification with data-efficient training" } repositories: { - url: "https://github.com/rishikksh20/ResMLP-pytorch" - owner: "rishikksh20" + url: "https://github.com/facebookresearch/deit" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "ResMLP: Feedforward networks for image classification with data-efficient training" + number_of_stars: 2047 + description: "Official DeiT repository" + } + repositories: { + url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" + owner: "xmu-xiaoma666" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1492 + description: "🍀 Pytorch implementation of various Attention Mechanisms, MLP, Re-parameter, Convolution, which is helpful to further understand papers.⭐⭐⭐" + } + repositories: { + url: "https://github.com/Mayurji/Image-Classification-PyTorch" + owner: "Mayurji" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "Learning and Building Convolutional Neural Networks using PyTorch" } methods: { name: "LayerScale" @@ -41393,33 +41821,33 @@ pr_id_to_video: { authors: "Gabriel Synnaeve" authors: "Hervé Jégou" repositories: { - url: "https://github.com/lucidrains/vit-pytorch" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 - description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" - } - repositories: { - url: "https://github.com/jaketae/res-mlp" - owner: "jaketae" + is_official: true + url: "https://github.com/facebookresearch/deit" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - description: "PyTorch implementation of ResMLP: Feedforward networks for image classification with data-efficient training" + number_of_stars: 2047 + description: "Official DeiT repository" } repositories: { is_official: true url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } repositories: { - is_official: true - url: "https://github.com/facebookresearch/deit" - owner: "facebookresearch" + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 1967 - description: "Official DeiT repository" + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } + repositories: { + url: "https://github.com/jaketae/res-mlp" + owner: "jaketae" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementation of ResMLP: Feedforward networks for image classification with data-efficient training" } methods: { name: "Feedforward Network" @@ -41476,35 +41904,35 @@ pr_id_to_video: { authors: "Hervé Jégou" authors: "Matthijs Douze" repositories: { - is_official: true - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + url: "https://github.com/PaddlePaddle/PaddleClas" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2166 + description: "A treasure chest for visual recognition powered by PaddlePaddle" } repositories: { url: "https://github.com/lucidrains/vit-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 + number_of_stars: 5337 description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } repositories: { is_official: true url: "https://github.com/facebookresearch/LeViT" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 282 + number_of_stars: 311 description: "LeViT a Vision Transformer in ConvNet's Clothing for Faster Inference" } + repositories: { + is_official: true + url: "https://github.com/rwightman/pytorch-image-models" + owner: "rwightman" + framework: FRAMEWORK_PYTORCH + number_of_stars: 12196 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + } methods: { name: "Average Pooling" full_name: "Average Pooling" @@ -41560,7 +41988,7 @@ pr_id_to_video: { paper_id: "visual-transformer-pruning" title: "Visual Transformer Pruning" arxiv_id: "2104.08500" - abstract: "Visual transformer has achieved competitive performance on a variety of computer vision applications. However, their storage, run-time memory, and computational demands are hindering the deployment on mobile devices. Here we present an visual transformer pruning approach, which identifies the impacts of channels in each layer and then executes pruning accordingly. By encouraging channel-wise sparsity in the Transformer, important channels automatically emerge. A great number of channels with small coefficients can be discarded to achieve a high pruning ratio without significantly compromising accuracy. The pipeline for visual transformer pruning is as follows: 1) training with sparsity regularization; 2) pruning channels; 3) finetuning. The reduced parameters and FLOPs ratios of the proposed algorithm are well evaluated and analyzed on ImageNet dataset to demonstrate its effectiveness." + abstract: "Vision transformer has achieved competitive performance on a variety of computer vision applications. However, their storage, run-time memory, and computational demands are hindering the deployment to mobile devices. Here we present a vision transformer pruning approach, which identifies the impacts of dimensions in each layer of transformer and then executes pruning accordingly. By encouraging dimension-wise sparsity in the transformer, important dimensions automatically emerge. A great number of dimensions with small importance scores can be discarded to achieve a high pruning ratio without significantly compromising accuracy. The pipeline for vision transformer pruning is as follows: 1) training with sparsity regularization; 2) pruning dimensions of linear projections; 3) fine-tuning. The reduced parameters and FLOPs ratios of the proposed algorithm are well evaluated and analyzed on ImageNet dataset to demonstrate the effectiveness of our proposed method." published_date: { seconds: 1618617600 } @@ -41603,6 +42031,11 @@ pr_id_to_video: { full_name: "Softmax" description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } + methods: { + name: "Vision Transformer" + full_name: "Vision Transformer" + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." + } methods: { name: "Adam" full_name: "Adam" @@ -41613,11 +42046,6 @@ pr_id_to_video: { full_name: "Multi-Head Attention" description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } } papers: { paper_id: "transformer-in-transformer" @@ -41638,31 +42066,31 @@ pr_id_to_video: { url: "https://github.com/huawei-noah/CV-backbones" owner: "huawei-noah" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1526 + number_of_stars: 1559 description: "CV backbones including GhostNet, TinyNet and TNT." } - repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } repositories: { is_official: true url: "https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch" owner: "master" framework: FRAMEWORK_PYTORCH - number_of_stars: 1526 + number_of_stars: 1559 description: "CV backbones including GhostNet, TinyNet and TNT." } repositories: { url: "https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/tnt.py" owner: "models" framework: FRAMEWORK_PYTORCH - number_of_stars: 11580 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } + repositories: { + url: "https://github.com/lucidrains/transformer-in-transformer" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 240 + description: "Implementation of Transformer in Transformer, pixel level attention paired with patch level attention for image classification, in Pytorch" + } repositories: { url: "https://github.com/NZ99/transformer_in_transformer_flax" owner: "NZ99" @@ -41670,11 +42098,11 @@ pr_id_to_video: { number_of_stars: 20 } repositories: { - url: "https://github.com/lucidrains/transformer-in-transformer" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 234 - description: "Implementation of Transformer in Transformer, pixel level attention paired with patch level attention for image classification, in Pytorch" + url: "https://github.com/PaddlePaddle/PaddleClas" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2166 + description: "A treasure chest for visual recognition powered by PaddlePaddle" } methods: { name: "Attention Dropout" @@ -41744,29 +42172,34 @@ pr_id_to_video: { url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } + repositories: { + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } repositories: { is_official: true url: "https://github.com/google-research/nested-transformer" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 49 + number_of_stars: 59 description: "Aggregating Nested Transformer https://arxiv.org/pdf/2105.12723.pdf" } - repositories: { - url: "https://github.com/lucidrains/vit-pytorch" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 - description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" - } methods: { name: "Scaled Dot-Product Attention" full_name: "Scaled Dot-Product Attention" description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } methods: { name: "Softmax" full_name: "Softmax" @@ -41775,7 +42208,12 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { name: "Multi-Head Attention" @@ -41802,48 +42240,18 @@ pr_id_to_video: { url: "https://github.com/lukemelas/do-you-even-need-attention" owner: "lukemelas" framework: FRAMEWORK_PYTORCH - number_of_stars: 427 + number_of_stars: 430 description: "Exploring whether attention is necessary for vision transformers" } - methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" - } - methods: { - name: "Feedforward Network" - full_name: "Feedforward Network" - description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "DeiT" - full_name: "Data-efficient Image Transformer" - description: "A **Data-Efficient Image Transformer** is a type of Vision Transformer for image classification tasks. The model is trained using a teacher-student strategy specific to transformers. It relies on a distillation token ensuring that the student learns from the teacher through attention." - } methods: { name: "Softmax" full_name: "Softmax" description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "Vision Transformer" - full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." - } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" } methods: { name: "Dropout" @@ -41854,8 +42262,8 @@ pr_id_to_video: { video: { video_id: "DjEvzeiWBTo" video_title: "PR-297: Training Data-efficient Image Transformers & Distillation through Attention (DeiT)" - number_of_likes: 58 - number_of_views: 3025 + number_of_likes: 67 + number_of_views: 3363 published_date: { seconds: 1610322232 } @@ -41885,65 +42293,65 @@ pr_id_to_video: { url: "https://github.com/canjiali/PARADE" owner: "canjiali" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 60 + number_of_stars: 62 description: "code and data to faciliate BERT/ELECTRA for document ranking. Details refer to the paper - PARADE: Passage Representation Aggregation for Document Reranking. " } methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } methods: { - name: "GELU" - full_name: "Gaussian Error Linear Units" - description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "T5" - full_name: "T5" - description: "**T5**, or **Text-to-Text Transfer Transformer**, is a Transformer based architecture that uses a text-to-text approach. Every task – including translation, question answering, and classification – is cast as feeding the model text as input and training it to generate some target text. This allows for the use of the same model, loss function, hyperparameters, etc. across our diverse set of tasks. The changes compared to [BERT](https://paperswithcode.com/method/bert) include:\r\n\r\n- adding a *causal* decoder to the bidirectional architecture.\r\n- replacing the fill-in-the-blank cloze task with a mix of alternative pre-training tasks." + name: "Adafactor" + full_name: "Adafactor" + description: "**Adafactor** is a stochastic optimization method based on [Adam](https://paperswithcode.com/method/adam) that reduces memory usage while retaining the empirical benefits of adaptivity. This is achieved through maintaining a factored representation of the squared gradient accumulator across training steps. Specifically, by tracking moving averages of the row and column sums of the squared gradients for matrix-valued variables, we are able to reconstruct a low-rank approximation of the exponentially smoothed accumulator at each training step that is optimal with respect to the generalized Kullback-Leibler divergence. For an $n \\times m$ matrix, this reduces the memory requirements from $O(n m)$ to $O(n + m)$. \r\n\r\nInstead of defining the optimization algorithm in terms of absolute step sizes {$\\alpha_t$}$\\_{t=1}^T$, the authors define the optimization algorithm in terms of relative step sizes {$\\rho_t$}$\\_{t=1}^T$, which get multiplied by the scale of the parameters. The scale of a parameter vector or matrix is defined as the root-mean-square of its components, lower-bounded by a small constant $\\epsilon_2$. The reason for this lower bound is to allow zero-initialized parameters to escape 0. \r\n\r\nProposed hyperparameters are: $\\epsilon\\_{1} = 10^{-30}$, $\\epsilon\\_{2} = 10^{-3}$, $d=1$, $p\\_{t} = \\min\\left(10^{-2}, \\frac{1}{\\sqrt{t}}\\right)$, $\\hat{\\beta}\\_{2\\_{t}} = 1 - t^{-0.8}$." } methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" } methods: { - name: "SentencePiece" - full_name: "SentencePiece" - description: "**SentencePiece** is a subword tokenizer and detokenizer for natural language processing. It performs subword segmentation, supporting the byte-pair-encoding (BPE) algorithm and unigram language model, and then converts this text into an id sequence guarantee perfect reproducibility of the normalization and subword segmentation." + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." } methods: { - name: "Inverse Square Root Schedule" - full_name: "Inverse Square Root Schedule" - description: "**Inverse Square Root** is a learning rate schedule 1 / $\\sqrt{\\max\\left(n, k\\right)}$ where\r\n$n$ is the current training iteration and $k$ is the number of warm-up steps. This sets a constant learning rate for the first $k$ steps, then exponentially decays the learning rate until pre-training is over." + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "GLU" - full_name: "Gated Linear Unit" - description: "A **Gated Linear Unit**, or **GLU** computes:\r\n\r\n$$ \\text{GLU}\\left(a, b\\right) = a\\otimes \\sigma\\left(b\\right) $$\r\n\r\nIt is used in natural language processing architectures, for example the [Gated CNN](https://paperswithcode.com/method/gated-convolution-network), because here $b$ is the gate that control what information from $a$ is passed up to the following layer. Intuitively, for a language modeling task, the gating mechanism allows selection of words or features that are important for predicting the next word. The GLU also has non-linear capabilities, but has a linear path for the gradient so diminishes the vanishing gradient problem." + name: "BPE" + full_name: "Byte Pair Encoding" + description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" } methods: { - name: "WordPiece" - full_name: "WordPiece" - description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" + name: "Attention Dropout" + full_name: "Attention Dropout" + description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." } } papers: { paper_id: "pretrained-transformers-for-text-ranking-bert" title: "Pretrained Transformers for Text Ranking: BERT and Beyond" arxiv_id: "2010.06467" - abstract: "The goal of text ranking is to generate an ordered list of texts retrieved from a corpus in response to a query. Although the most common formulation of text ranking is search, instances of the task can also be found in many natural language processing applications. This survey provides an overview of text ranking with neural network architectures known as transformers, of which BERT is the best-known example. The combination of transformers and self-supervised pretraining has, without exaggeration, revolutionized the fields of natural language processing (NLP), information retrieval (IR), and beyond. In this survey, we provide a synthesis of existing work as a single point of entry for practitioners who wish to gain a better understanding of how to apply transformers to text ranking problems and researchers who wish to pursue work in this area. We cover a wide range of modern techniques, grouped into two high-level categories: transformer models that perform reranking in multi-stage ranking architectures and learned dense representations that attempt to perform ranking directly. There are two themes that pervade our survey: techniques for handling long documents, beyond the typical sentence-by-sentence processing approaches used in NLP, and techniques for addressing the tradeoff between effectiveness (result quality) and efficiency (query latency). Although transformer architectures and pretraining techniques are recent innovations, many aspects of how they are applied to text ranking are relatively well understood and represent mature techniques. However, there remain many open research questions, and thus in addition to laying out the foundations of pretrained transformers for text ranking, this survey also attempts to prognosticate where the field is heading." + abstract: "The goal of text ranking is to generate an ordered list of texts retrieved from a corpus in response to a query. Although the most common formulation of text ranking is search, instances of the task can also be found in many natural language processing applications. This survey provides an overview of text ranking with neural network architectures known as transformers, of which BERT is the best-known example. The combination of transformers and self-supervised pretraining has been responsible for a paradigm shift in natural language processing (NLP), information retrieval (IR), and beyond. In this survey, we provide a synthesis of existing work as a single point of entry for practitioners who wish to gain a better understanding of how to apply transformers to text ranking problems and researchers who wish to pursue work in this area. We cover a wide range of modern techniques, grouped into two high-level categories: transformer models that perform reranking in multi-stage architectures and dense retrieval techniques that perform ranking directly. There are two themes that pervade our survey: techniques for handling long documents, beyond typical sentence-by-sentence processing in NLP, and techniques for addressing the tradeoff between effectiveness (i.e., result quality) and efficiency (e.g., query latency, model and index size). Although transformer architectures and pretraining techniques are recent innovations, many aspects of how they are applied to text ranking are relatively well understood and represent mature techniques. However, there remain many open research questions, and thus in addition to laying out the foundations of pretrained transformers for text ranking, this survey also attempts to prognosticate where the field is heading." published_date: { seconds: 1602547200 } @@ -41954,7 +42362,7 @@ pr_id_to_video: { url: "https://github.com/UKPLab/beir" owner: "UKPLab" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 178 + number_of_stars: 218 description: "A Heterogeneous Benchmark for Information Retrieval. Easy to use, evaluate your models across 15+ diverse IR datasets." } methods: { @@ -42090,6 +42498,31 @@ pr_id_to_video: { framework: FRAMEWORK_OTHERS number_of_stars: 4 } + methods: { + name: "Weight Decay" + full_name: "Weight Decay" + description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + } + methods: { + name: "BPE" + full_name: "Byte Pair Encoding" + description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" + } methods: { name: "Attention Dropout" full_name: "Attention Dropout" @@ -42115,31 +42548,6 @@ pr_id_to_video: { full_name: "Transformer" description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } - methods: { - name: "WordPiece" - full_name: "WordPiece" - description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" - } - methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." - } - methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." - } - methods: { - name: "Linear Warmup With Linear Decay" - full_name: "Linear Warmup With Linear Decay" - description: "**Linear Warmup With Linear Decay** is a learning rate schedule in which we increase the learning rate linearly for $n$ updates and then linearly decay afterwards." - } } papers: { paper_id: "ceqe-contextualized-embeddings-for-query" @@ -42222,7 +42630,7 @@ pr_id_to_video: { url: "https://github.com/sebastian-hofstaetter/transformer-kernel-ranking" owner: "sebastian-hofstaetter" framework: FRAMEWORK_PYTORCH - number_of_stars: 95 + number_of_stars: 128 description: "Training & evaluation library for text-based neural re-ranking and dense retrieval models built with PyTorch" } } @@ -42237,69 +42645,69 @@ pr_id_to_video: { authors: "Leonid Boytsov" authors: "Zico Kolter" repositories: { - url: "https://github.com/oaqa/knn4qa" + is_official: true + url: "https://github.com/oaqa/FlexNeuART" owner: "oaqa" framework: FRAMEWORK_OTHERS number_of_stars: 109 description: "Flexible classic and NeurAl Retrieval Toolkit" } repositories: { - is_official: true - url: "https://github.com/oaqa/FlexNeuART" + url: "https://github.com/oaqa/knn4qa" owner: "oaqa" framework: FRAMEWORK_OTHERS number_of_stars: 109 description: "Flexible classic and NeurAl Retrieval Toolkit" } methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." } methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "Weight Decay" - full_name: "Weight Decay" - description: "**Weight Decay**, or **$L_{2}$ Regularization**, is a regularization technique applied to the weights of a neural network. We minimize a loss function compromising both the primary loss function and a penalty on the $L\\_{2}$ Norm of the weights:\r\n\r\n$$L\\_{new}\\left(w\\right) = L\\_{original}\\left(w\\right) + \\lambda{w^{T}w}$$\r\n\r\nwhere $\\lambda$ is a value determining the strength of the penalty (encouraging smaller weights). \r\n\r\nWeight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining it through to objective function. Often weight decay refers to the implementation where we specify it directly in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the objective function).\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Attention Dropout" + full_name: "Attention Dropout" + description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" + } + methods: { + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" } methods: { name: "Multi-Head Attention" full_name: "Multi-Head Attention" description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } methods: { name: "WordPiece" full_name: "WordPiece" description: "**WordPiece** is a subword segmentation algorithm used in natural language processing. The vocabulary is initialized with individual characters in the language, then the most frequent combinations of symbols in the vocabulary are iteratively added to the vocabulary. The process is:\r\n\r\n1. Initialize the word unit inventory with all the characters in the text.\r\n2. Build a language model on the training data using the inventory from 1.\r\n3. Generate a new word unit by combining two units out of the current word inventory to increment the word unit inventory by one. Choose the new word unit out of all the possible ones that increases the likelihood on the training data the most when added to the model.\r\n4. Goto 2 until a predefined limit of word units is reached or the likelihood increase falls below a certain threshold.\r\n\r\nText: [Source](https://stackoverflow.com/questions/55382596/how-is-wordpiece-tokenization-helpful-to-effectively-deal-with-rare-words-proble/55416944#55416944)\r\n\r\nImage: WordPiece as used in BERT" } - methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" - } methods: { name: "Residual Connection" full_name: "Residual Connection" description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "GELU" - full_name: "Gaussian Error Linear Units" - description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } } papers: { @@ -42313,6 +42721,12 @@ pr_id_to_video: { authors: "Ronak Pradeep" authors: "Rodrigo Nogueira" authors: "Jimmy Lin" + repositories: { + url: "https://github.com/terrierteam/pyterrier_t5" + owner: "terrierteam" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } } papers: { paper_id: "denmark-s-participation-in-the-search-engine" @@ -42335,7 +42749,7 @@ pr_id_to_video: { video_id: "xhew1Tt62mY" video_title: "PR-298: PARADE: Passage representation aggregation for document reranking" number_of_likes: 7 - number_of_views: 329 + number_of_views: 337 published_date: { seconds: 1610900771 } @@ -42448,19 +42862,19 @@ pr_id_to_video: { authors: "Cao Xiao" authors: "Jimeng Sun" authors: "Marinka Zitnik" - repositories: { - url: "https://github.com/yzhao062/yzhao062" - owner: "yzhao062" - framework: FRAMEWORK_OTHERS - number_of_stars: 6 - } repositories: { is_official: true url: "https://github.com/mims-harvard/TDC" owner: "mims-harvard" framework: FRAMEWORK_PYTORCH - number_of_stars: 379 - description: "Therapeutics Data Commons: Machine Learning Foundation for Therapeutics" + number_of_stars: 399 + description: "Therapeutics Data Commons: Machine Learning Datasets and Tasks for Drug Discovery and Development" + } + repositories: { + url: "https://github.com/yzhao062/yzhao062" + owner: "yzhao062" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 } } papers: { @@ -42531,7 +42945,7 @@ pr_id_to_video: { video_id: "bvdTif4JSPU" video_title: "PR-299: Accelerating high-throughput virtual screening through molecular pool-based active learning" number_of_likes: 11 - number_of_views: 240 + number_of_views: 257 published_date: { seconds: 1611496274 } @@ -42559,7 +42973,7 @@ pr_id_to_video: { url: "https://github.com/TengdaHan/CoCLR" owner: "TengdaHan" framework: FRAMEWORK_PYTORCH - number_of_stars: 202 + number_of_stars: 214 description: "[NeurIPS'20] Self-supervised Co-Training for Video Representation Learning. Tengda Han, Weidi Xie, Andrew Zisserman." } methods: { @@ -42649,17 +43063,17 @@ pr_id_to_video: { authors: "Rongrong Ji" authors: "Xing Sun" repositories: { - url: "https://github.com/FingerRec/BE" + url: "https://github.com/FingerRec/TBE" owner: "FingerRec" framework: FRAMEWORK_PYTORCH - number_of_stars: 53 + number_of_stars: 125 description: "[CVPR2021] The source code for our paper 《Removing the Background by Adding the Background: Towards Background Robust Self-supervised Video Representation Learning》." } repositories: { - url: "https://github.com/FingerRec/TBE" + url: "https://github.com/FingerRec/BE" owner: "FingerRec" framework: FRAMEWORK_PYTORCH - number_of_stars: 53 + number_of_stars: 125 description: "[CVPR2021] The source code for our paper 《Removing the Background by Adding the Background: Towards Background Robust Self-supervised Video Representation Learning》." } methods: { @@ -42724,7 +43138,7 @@ pr_id_to_video: { url: "https://github.com/laura-wang/video-pace" owner: "laura-wang" framework: FRAMEWORK_OTHERS - number_of_stars: 91 + number_of_stars: 92 description: "code for our ECCV-2020 paper: Self-supervised Video Representation Learning by Pace Prediction" } } @@ -42771,7 +43185,7 @@ pr_id_to_video: { url: "https://github.com/martinetoering/ViCC" owner: "martinetoering" framework: FRAMEWORK_PYTORCH - number_of_stars: 15 + number_of_stars: 18 description: "Code repository for \"Self-supervised Video Representation Learning with Cross-Stream Prototypical Contrasting\", https://arxiv.org/abs/2106.10137." } methods: { @@ -42823,8 +43237,8 @@ pr_id_to_video: { video: { video_id: "-9hGJzL6se0" video_title: "PR-300: Self-Supervised Co-Training for Video Representation Learning" - number_of_likes: 8 - number_of_views: 786 + number_of_likes: 11 + number_of_views: 834 published_date: { seconds: 1611498558 } @@ -42853,41 +43267,48 @@ pr_id_to_video: { authors: "Mark Chen" authors: "Ilya Sutskever" repositories: { - url: "https://github.com/JoyPang123/Textmage" - owner: "JoyPang123" + url: "https://github.com/borisdayma/dalle-mini" + owner: "borisdayma" + framework: FRAMEWORK_OTHERS + number_of_stars: 281 + description: "DALL·E Mini - Generate images from a text prompt" + } + repositories: { + is_official: true + url: "https://github.com/openai/DALL-E" + owner: "openai" framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "A website to generate images from text." + number_of_stars: 3170 + description: "PyTorch package for the discrete VAE used for DALL·E." } repositories: { - url: "https://github.com/epfml/powersgd" - owner: "epfml" + url: "https://github.com/lucidrains/DALLE-pytorch" + owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 60 - description: "Practical low-rank gradient compression for distributed optimization: https://arxiv.org/abs/1905.13727" + number_of_stars: 3268 + description: "Implementation / replication of DALL-E, OpenAI's Text to Image Transformer, in Pytorch" } repositories: { is_official: true url: "https://github.com/openai/DALL-E/blob/master/notebooks/usage.ipynb" owner: "notebooks" framework: FRAMEWORK_PYTORCH - number_of_stars: 2996 + number_of_stars: 3170 description: "PyTorch package for the discrete VAE used for DALL·E." } repositories: { - url: "https://github.com/lucidrains/DALLE-pytorch" - owner: "lucidrains" + url: "https://github.com/epfml/powersgd" + owner: "epfml" framework: FRAMEWORK_PYTORCH - number_of_stars: 3154 - description: "Implementation / replication of DALL-E, OpenAI's Text to Image Transformer, in Pytorch" + number_of_stars: 63 + description: "Practical low-rank gradient compression for distributed optimization: https://arxiv.org/abs/1905.13727" } repositories: { - is_official: true - url: "https://github.com/openai/DALL-E" - owner: "openai" + url: "https://github.com/JoyPang123/Textmage" + owner: "JoyPang123" framework: FRAMEWORK_PYTORCH - number_of_stars: 2996 - description: "PyTorch package for the discrete VAE used for DALL·E." + number_of_stars: 9 + description: "A website to generate images from text." } } papers: { @@ -42914,7 +43335,7 @@ pr_id_to_video: { url: "https://github.com/THUDM/CogView" owner: "THUDM" framework: FRAMEWORK_PYTORCH - number_of_stars: 402 + number_of_stars: 467 description: "Text-to-Image generation" } methods: { @@ -42983,63 +43404,67 @@ pr_id_to_video: { authors: "Bernt Schiele" authors: "Honglak Lee" repositories: { - url: "https://github.com/jay-z007/Text-to-Image-Synthesis" - owner: "jay-z007" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "PyTorch Implementation of the paper - 'Generative Adversarial Text to Image Synthesis' from ICML 2016 https://arxiv.org/abs/1605.05396" + url: "https://github.com/hanzhanggit/StackGAN" + owner: "hanzhanggit" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 1732 } repositories: { - url: "https://github.com/priscillalui/StackGAN-Stories" - owner: "priscillalui" - framework: FRAMEWORK_PYTORCH - description: "StackGAN-v2 on a custom children's book stories dataset" + url: "https://github.com/rafiahmed40/stack-adverserial-network" + owner: "rafiahmed40" + framework: FRAMEWORK_TENSORFLOW } repositories: { - url: "https://github.com/rightlit/StackGAN-v2-rev" - owner: "rightlit" + url: "https://github.com/DanielLongo/AdversarialTrain" + owner: "DanielLongo" framework: FRAMEWORK_PYTORCH number_of_stars: 1 - description: "StackGAN-v2 revised and applied demos" + description: "cGANs for data augmentation, adversarial training, and transfer learning" } repositories: { - url: "https://github.com/anandaltekar/movie-poster-generator" - owner: "anandaltekar" + url: "https://github.com/DanielLongo/cGANs" + owner: "DanielLongo" framework: FRAMEWORK_PYTORCH - description: "Generate posters from short plot descriptions." + number_of_stars: 1 + description: "cGANs for data augmentation, adversarial training, and transfer learning" } repositories: { - url: "https://github.com/KanikaNegi/text-to-image-using-GAN" - owner: "KanikaNegi" + url: "https://github.com/DanielLongo/GANs" + owner: "DanielLongo" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + description: "cGANs for data augmentation, adversarial training, and transfer learning" } repositories: { - url: "https://github.com/srayhit/TextToImageSynthesisUsingGAN" - owner: "srayhit" - framework: FRAMEWORK_TENSORFLOW - description: "This repository includes PyTorch and Tensorflow implementations of Generative Adversarial Text-to-Image Synthesis used for the final project for Artificial Neural Computation." + url: "https://github.com/Maymaher/StackGANv2" + owner: "Maymaher" + framework: FRAMEWORK_PYTORCH + description: "StackGAN v2" } repositories: { - url: "https://github.com/BeyondCloud/Comp04_ReverseImageCaption" - owner: "BeyondCloud" - framework: FRAMEWORK_TENSORFLOW + url: "https://github.com/hanzhanggit/StackGAN-Pytorch" + owner: "hanzhanggit" + framework: FRAMEWORK_PYTORCH + number_of_stars: 414 } repositories: { - url: "https://github.com/snow-mn/GAN-INT-CLS" - owner: "snow-mn" + url: "https://github.com/scrambleegg7/Text-to-Image-Synthesis2" + owner: "scrambleegg7" framework: FRAMEWORK_PYTORCH + description: "a.k.a StackGAN (Generative Adversarial Text-to-Image Synthesis paper) to emulate it with pytorch (convert python3.x)" } repositories: { - url: "https://github.com/Vigneshthanga/stackGAN-v2" - owner: "Vigneshthanga" + url: "https://github.com/vtddggg/BilinearGAN_for_LBIE" + owner: "vtddggg" framework: FRAMEWORK_PYTORCH - description: "StackGAN-V2 is one among the SOTA method in genrating HQ images from text " + number_of_stars: 19 + description: "Implementation of our paper [Bilinear Representation for Language-Based Image Editing using Conditional Generative Adversarial Networks](https://arxiv.org/abs/1903.07499) in ICASSP2019" } repositories: { - url: "https://github.com/hanzhanggit/StackGAN" - owner: "hanzhanggit" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1724 + url: "https://github.com/faizaan09/caption-to-image-generation" + owner: "faizaan09" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementation of \"Generative Adversarial Text-to-Image Synthesis\" from ICML 2016 http://arxiv.org/abs/1605.05396" } } papers: { @@ -43104,7 +43529,7 @@ pr_id_to_video: { url: "https://github.com/mehdidc/DALLE_clip_score" owner: "mehdidc" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 7 description: "Simple script to compute CLIP-based scores given a DALL-e trained model." } } @@ -43131,8 +43556,8 @@ pr_id_to_video: { video: { video_id: "az-OV47oKvA" video_title: "PR-301: Zero-Shot Text-to-Image Generation" - number_of_likes: 44 - number_of_views: 2834 + number_of_likes: 49 + number_of_views: 3344 published_date: { seconds: 1615131037 } @@ -43159,70 +43584,73 @@ pr_id_to_video: { authors: "Ravi Ramamoorthi" authors: "Ren Ng" repositories: { - url: "https://github.com/JulianKnodt/nerf_atlas" - owner: "JulianKnodt" + url: "https://github.com/yenchenlin/nerf-pytorch" + owner: "yenchenlin" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "A collection of NeRF extensions for fun and experimentation." + number_of_stars: 1181 + description: "A PyTorch implementation of NeRF (Neural Radiance Fields) that reproduces the results." } repositories: { - url: "https://github.com/kklemon/gon-pytorch" - owner: "kklemon" - framework: FRAMEWORK_PYTORCH - number_of_stars: 4 + is_official: true + url: "https://github.com/bmild/nerf" + owner: "bmild" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 3669 + description: "Code release for NeRF (Neural Radiance Fields)" } repositories: { - url: "https://github.com/computational-imaging/automatic-integration" - owner: "computational-imaging" - framework: FRAMEWORK_PYTORCH - number_of_stars: 76 - description: "Official repo for AutoInt: Automatic Integration for Fast Neural Volume Rendering in CVPR 2021" + url: "https://github.com/AnimatedRNG/nerf-jax" + owner: "AnimatedRNG" + framework: FRAMEWORK_OTHERS + number_of_stars: 2 + description: "A JAX rewrite of the NeRF reconstruction technique" } repositories: { - url: "https://github.com/matsuren/nerf_jax_flax_practice" - owner: "matsuren" + url: "https://github.com/ALBERT-Inc/blog_nerf" + owner: "ALBERT-Inc" framework: FRAMEWORK_OTHERS - description: "Practice repository for jax and flax implementing NeRF: Representing Scenes as Neural Radiance Fields " + number_of_stars: 23 + description: "NeRF再現実装公開用リポジトリ" } repositories: { - url: "https://github.com/facebookresearch/pytorch3d/tree/master/projects/nerf" - owner: "projects" + url: "https://github.com/krrish94/nerf-pytorch" + owner: "krrish94" framework: FRAMEWORK_PYTORCH - number_of_stars: 4981 - description: "PyTorch3D is FAIR's library of reusable components for deep learning with 3D data" + number_of_stars: 434 + description: "A PyTorch re-implementation of Neural Radiance Fields" } repositories: { - url: "https://github.com/facebookresearch/NSVF" - owner: "facebookresearch" + url: "https://github.com/kwea123/nerf_pl" + owner: "kwea123" framework: FRAMEWORK_PYTORCH - number_of_stars: 360 - description: "Open source code for the paper of Neural Sparse Voxel Fields." + number_of_stars: 586 + description: "NeRF (Neural Radiance Fields) and NeRF in the Wild using pytorch-lightning" } repositories: { - url: "https://github.com/myagues/flax_nerf" - owner: "myagues" - framework: FRAMEWORK_OTHERS - number_of_stars: 9 - description: "Unofficial implementation of NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis, using Flax with the Linen API" + url: "https://github.com/wandb/gallery" + owner: "wandb" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 190 + description: "This is a collection of the code that accompanies the reports in The Gallery by Weights & Biases." } repositories: { - url: "https://github.com/jingma-git/NeRF_Pytorch" - owner: "jingma-git" - framework: FRAMEWORK_PYTORCH + url: "https://github.com/BoyuanJackChen/NeRF-Implementation" + owner: "BoyuanJackChen" + framework: FRAMEWORK_OTHERS number_of_stars: 1 } repositories: { - url: "https://github.com/AnimatedRNG/nerf-jax" - owner: "AnimatedRNG" - framework: FRAMEWORK_OTHERS - number_of_stars: 2 - description: "A JAX rewrite of the NeRF reconstruction technique" + url: "https://github.com/yuehaowang/nerf" + owner: "yuehaowang" + framework: FRAMEWORK_PYTORCH + description: "My re-implementation of NeRF." } repositories: { - url: "https://github.com/komeiharada/learn_DL_together" - owner: "komeiharada" + url: "https://github.com/wandb/awesome-dl-projects" + owner: "wandb" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 2 + number_of_stars: 190 + description: "This is a collection of the code that accompanies the reports in The Gallery by Weights & Biases." } methods: { name: "NeRF" @@ -43265,7 +43693,7 @@ pr_id_to_video: { url: "https://github.com/albertpumarola/D-NeRF" owner: "albertpumarola" framework: FRAMEWORK_PYTORCH - number_of_stars: 52 + number_of_stars: 65 } methods: { name: "NeRF" @@ -43291,7 +43719,7 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/nonrigid_nerf" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 140 + number_of_stars: 151 description: "Open source repository for the code accompanying the paper 'Non-Rigid Neural Radiance Fields Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video'." } methods: { @@ -43317,7 +43745,7 @@ pr_id_to_video: { url: "https://github.com/Kai-46/nerfplusplus" owner: "Kai-46" framework: FRAMEWORK_PYTORCH - number_of_stars: 329 + number_of_stars: 344 description: "improves over nerf in 360 capture of unbounded scenes" } methods: { @@ -43354,19 +43782,19 @@ pr_id_to_video: { authors: "Vickie Ye" authors: "Matthew Tancik" authors: "Angjoo Kanazawa" - repositories: { - url: "https://github.com/arielbenitah/pixel-nerf" - owner: "arielbenitah" - framework: FRAMEWORK_PYTORCH - } repositories: { is_official: true url: "https://github.com/sxyu/pixel-nerf" owner: "sxyu" framework: FRAMEWORK_PYTORCH - number_of_stars: 451 + number_of_stars: 471 description: "PixelNeRF Official Repository" } + repositories: { + url: "https://github.com/arielbenitah/pixel-nerf" + owner: "arielbenitah" + framework: FRAMEWORK_PYTORCH + } methods: { name: "NeRF" full_name: "Neural Radiance Field" @@ -43394,8 +43822,8 @@ pr_id_to_video: { video: { video_id: "zkeh7Tt9tYQ" video_title: "PR-302: NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis" - number_of_likes: 27 - number_of_views: 1168 + number_of_likes: 33 + number_of_views: 1508 published_date: { seconds: 1615203796 } @@ -43483,21 +43911,21 @@ pr_id_to_video: { url: "https://github.com/prajjwal1/fluence" owner: "prajjwal1" framework: FRAMEWORK_PYTORCH - number_of_stars: 58 + number_of_stars: 59 description: "A deep learning library based on Pytorch focussed on low resource language research and robustness" } repositories: { url: "https://github.com/prajjwal1/adaptive_transformer" owner: "prajjwal1" framework: FRAMEWORK_PYTORCH - number_of_stars: 35 + number_of_stars: 37 description: "Code for the paper \"Adaptive Transformers for Learning Multimodal Representations\" (ACL SRW 2020)" } repositories: { url: "https://github.com/pytorch/fairseq" owner: "pytorch" framework: FRAMEWORK_PYTORCH - number_of_stars: 13300 + number_of_stars: 13559 description: "Facebook AI Research Sequence-to-Sequence Toolkit written in Python." } methods: { @@ -43574,7 +44002,7 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/CovidPrognosis" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 132 + number_of_stars: 133 description: "COVID deterioration prediction based on chest X-ray radiographs via MoCo-trained image representations" } } @@ -43595,7 +44023,7 @@ pr_id_to_video: { url: "https://github.com/kzl/universal-computation" owner: "kzl" framework: FRAMEWORK_PYTORCH - number_of_stars: 142 + number_of_stars: 153 description: "Official codebase for Pretrained Transformers as Universal Computation Engines." } methods: { @@ -43666,35 +44094,35 @@ pr_id_to_video: { url: "https://github.com/lucidrains/x-transformers" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 901 + number_of_stars: 992 description: "A simple but complete full-attention transformer with a set of promising experimental features from various papers" } repositories: { url: "https://github.com/lucidrains/memory-transformer-xl" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 16 + number_of_stars: 18 description: "A variant of Transformer-XL where the memory is updated not with a queue, but with attention" } repositories: { url: "https://github.com/lucidrains/reformer-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 1538 + number_of_stars: 1568 description: "Reformer, the efficient Transformer, in Pytorch" } repositories: { url: "https://github.com/facebookresearch/adaptive-span" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 554 + number_of_stars: 560 description: "Transformer training code for sequential tasks" } repositories: { url: "https://github.com/lucidrains/routing-transformer" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 182 + number_of_stars: 192 description: "Fully featured implementation of Routing Transformer" } methods: { @@ -43738,72 +44166,72 @@ pr_id_to_video: { authors: "Alexander Kirillov" authors: "Sergey Zagoruyko" repositories: { - url: "https://github.com/Leonardo-Blanger/detr_tensorflow" - owner: "Leonardo-Blanger" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 52 - description: "A Tensorflow implementation of the DETR object detection architecture." - } - repositories: { - url: "https://github.com/yhy258/DETR-For-Study" - owner: "yhy258" + url: "https://github.com/huggingface/transformers" + owner: "huggingface" framework: FRAMEWORK_PYTORCH + number_of_stars: 49984 + description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { - url: "https://github.com/DataXujing/TensorRT-DETR" - owner: "DataXujing" + url: "https://github.com/tahmid0007/DETR_FineTune" + owner: "tahmid0007" framework: FRAMEWORK_PYTORCH - number_of_stars: 16 - description: ":zap::zap::zap:NVIDIA-阿里2021 TRT比赛 `二等奖` 代码提交 团队:美迪康 AI Lab :rocket::rocket::rocket:" + number_of_stars: 6 + description: "A simple modification on the official DETR codebase with support to Finetune on custom dataset" } repositories: { - url: "https://github.com/mlpc-ucsd/PRTR" - owner: "mlpc-ucsd" - framework: FRAMEWORK_OTHERS - number_of_stars: 76 - description: "PRTR: Pose Recognition with Cascade Transformers" + is_official: true + url: "https://github.com/facebookresearch/detr" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7367 + description: "End-to-End Object Detection with Transformers" } repositories: { - url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" - owner: "KostadinovShalon" + url: "https://github.com/facebookresearch/swav" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 8 + number_of_stars: 1172 + description: "PyTorch implementation of SwAV https//arxiv.org/abs/2006.09882" } repositories: { - url: "https://github.com/EmGarr/kerod" - owner: "EmGarr" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 36 - description: "DETR - Faster RCNN implementation in tensorflow 2" + url: "https://github.com/ananyahjha93/swav" + owner: "ananyahjha93" + framework: FRAMEWORK_PYTORCH + description: "PyTorch implementation of SwAV https//arxiv.org/abs/2006.09882" } repositories: { - url: "https://github.com/open-mmlab/mmdetection" - owner: "open-mmlab" + url: "https://github.com/clive819/Modified-DETR" + owner: "clive819" framework: FRAMEWORK_PYTORCH - number_of_stars: 15628 - description: "OpenMMLab Detection Toolbox and Benchmark" + number_of_stars: 13 + description: "The PyTorch re-implement of the official DETR." } repositories: { - url: "https://github.com/Visual-Behavior/detr-tensorflow" - owner: "Visual-Behavior" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 64 - description: "Tensorflow implementation of DETR : Object Detection with Transformers" + url: "https://github.com/KostadinovShalon/UAVDetectionTrackingBenchmark" + owner: "KostadinovShalon" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 } repositories: { - url: "https://github.com/tahmid0007/DETR_FineTune" - owner: "tahmid0007" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "A simple modification on the official DETR codebase with support to Finetune on custom dataset" + url: "https://github.com/ywsyws/FormationSimplon20200622dlKeras" + owner: "ywsyws" + framework: FRAMEWORK_OTHERS } repositories: { url: "https://github.com/hsfzxjy/swavx" owner: "hsfzxjy" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 + number_of_stars: 2 description: "Extend SwAV support ResNet variants." } + repositories: { + url: "https://github.com/Visual-Behavior/detr-tensorflow" + owner: "Visual-Behavior" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 67 + description: "Tensorflow implementation of DETR : Object Detection with Transformers" + } methods: { name: "Detr" full_name: "Detection Transformer" @@ -43873,7 +44301,7 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/protein-ebm" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 63 + number_of_stars: 67 description: "Energy-based models for atomic-resolution protein conformations" } } @@ -43891,40 +44319,40 @@ pr_id_to_video: { authors: "Ludovic Denoyer" authors: "Hervé Jégou" repositories: { - url: "https://github.com/fshdnc/enfi-XLM" - owner: "fshdnc" + is_official: true + url: "https://github.com/facebookresearch/XLM" + owner: "facebookresearch" framework: FRAMEWORK_PYTORCH + number_of_stars: 2444 + description: "PyTorch original implementation of Cross-lingual Language Model Pretraining." } repositories: { - url: "https://github.com/duongkstn/XLM-duong" - owner: "duongkstn" + url: "https://github.com/deterministic-algorithms-lab/Large-XLM" + owner: "deterministic-algorithms-lab" framework: FRAMEWORK_PYTORCH - description: "XLM fork" + number_of_stars: 1 + description: "XLM implementation with utilities to process and train on large multi-lingual datasets, with not enough RAM. " } repositories: { - url: "https://github.com/kheeong/XLM_OWN" - owner: "kheeong" + url: "https://github.com/feyzaakyurek/XLM-LwLL" + owner: "feyzaakyurek" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/feyzaakyurek/XLM-LwLL" - owner: "feyzaakyurek" + url: "https://github.com/kheeong/XLM_OWN" + owner: "kheeong" framework: FRAMEWORK_PYTORCH } repositories: { - url: "https://github.com/deterministic-algorithms-lab/Large-XLM" - owner: "deterministic-algorithms-lab" + url: "https://github.com/duongkstn/XLM-duong" + owner: "duongkstn" framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "XLM implementation with utilities to process and train on large multi-lingual datasets, with not enough RAM. " + description: "XLM fork" } repositories: { - is_official: true - url: "https://github.com/facebookresearch/XLM" - owner: "facebookresearch" + url: "https://github.com/fshdnc/enfi-XLM" + owner: "fshdnc" framework: FRAMEWORK_PYTORCH - number_of_stars: 2422 - description: "PyTorch original implementation of Cross-lingual Language Model Pretraining." } methods: { name: "Residual Connection" @@ -43989,11 +44417,11 @@ pr_id_to_video: { authors: "Dmytro Okhonko" authors: "Luke Zettlemoyer" repositories: { - url: "https://github.com/sooftware/OpenSpeech" - owner: "sooftware" + url: "https://github.com/insop/pytorch-hackathon" + owner: "insop" framework: FRAMEWORK_PYTORCH - number_of_stars: 148 - description: "Open-Source Toolkit for End-to-End Speech Recognition leveraging PyTorch-Lightning and Hydra." + number_of_stars: 6 + description: "Pytorch Hackathon" } repositories: { url: "https://github.com/sooftware/Fairseq-Listen-Attend-Spell" @@ -44003,11 +44431,11 @@ pr_id_to_video: { description: "A Fairseq implementation of Listen, Attend and Spell (LAS), an End-to-End ASR framework." } repositories: { - url: "https://github.com/insop/pytorch-hackathon" - owner: "insop" + url: "https://github.com/sooftware/OpenSpeech" + owner: "sooftware" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Pytorch Hackathon" + number_of_stars: 10 + description: "Open-Source Toolkit for End-to-End Speech Recognition leveraging PyTorch-Lightning and Hydra." } methods: { name: "Residual Connection" @@ -44063,8 +44491,8 @@ pr_id_to_video: { video: { video_id: "NoW5th8H3Q4" video_title: "PR-303: Transformer is All You Need(Facebook AI Research)" - number_of_likes: 27 - number_of_views: 1198 + number_of_likes: 29 + number_of_views: 1324 published_date: { seconds: 1615780111 } @@ -44093,7 +44521,7 @@ pr_id_to_video: { url: "https://github.com/kzl/universal-computation" owner: "kzl" framework: FRAMEWORK_PYTORCH - number_of_stars: 142 + number_of_stars: 153 description: "Official codebase for Pretrained Transformers as Universal Computation Engines." } methods: { @@ -44291,13 +44719,14 @@ pr_id_to_video: { url: "https://github.com/nips-vit-fl-sub/ViT-FL-main" owner: "nips-vit-fl-sub" framework: FRAMEWORK_PYTORCH + number_of_stars: 1 } repositories: { is_official: true url: "https://github.com/Liangqiong/ViT-FL-main" owner: "Liangqiong" framework: FRAMEWORK_PYTORCH - number_of_stars: 9 + number_of_stars: 12 } } papers: { @@ -44313,26 +44742,26 @@ pr_id_to_video: { authors: "Róbert Csordás" authors: "Jürgen Schmidhuber" repositories: { - url: "https://github.com/IDSIA/lmtool-fwp" + is_official: true + url: "https://github.com/IDSIA/recurrent-fwp" owner: "IDSIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "PyTorch Language Modeling Toolkit for Fast Weight Programmers" + number_of_stars: 24 + description: "Official repository for the paper \"Going Beyond Linear Transformers with Recurrent Fast Weight Programmers\"" } repositories: { url: "https://github.com/IDSIA/lmtool-fwms" owner: "IDSIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 6 + number_of_stars: 7 description: "PyTorch Language Modeling Toolkit for Fast Weight Programmers" } repositories: { - is_official: true - url: "https://github.com/IDSIA/recurrent-fwp" + url: "https://github.com/IDSIA/lmtool-fwp" owner: "IDSIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 - description: "Official repository for the paper \"Going Beyond Linear Transformers with Recurrent Fast Weight Programmers\"" + number_of_stars: 7 + description: "PyTorch Language Modeling Toolkit for Fast Weight Programmers" } methods: { name: "Tanh Activation" @@ -44372,15 +44801,15 @@ pr_id_to_video: { url: "https://github.com/chenfengxu714/image2point" owner: "chenfengxu714" framework: FRAMEWORK_OTHERS - number_of_stars: 38 + number_of_stars: 43 description: "Official implementation of Image2Point." } } video: { video_id: "2rB5aTdRTJM" video_title: "PR-304: Pretrained Transformers As Universal Computation Engines" - number_of_likes: 33 - number_of_views: 1215 + number_of_likes: 34 + number_of_views: 1305 published_date: { seconds: 1615737825 } @@ -44402,28 +44831,13 @@ pr_id_to_video: { } authors: "Xinlei Chen" authors: "Kaiming He" - repositories: { - url: "https://github.com/lightly-ai/lightly" - owner: "lightly-ai" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1064 - description: "A python library for self-supervised learning on images." - } repositories: { url: "https://github.com/vturrisi/solo-learn" owner: "vturrisi" framework: FRAMEWORK_PYTORCH - number_of_stars: 47 + number_of_stars: 289 description: "solo-learn: a library of self-supervised methods for visual representation learning powered by Pytorch Lightning" } - repositories: { - is_official: true - url: "https://github.com/facebookresearch/simsiam" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 240 - description: "PyTorch implementation of SimSiam https//arxiv.org/abs/2011.10566" - } repositories: { url: "https://github.com/ahmdtaha/simsiam" owner: "ahmdtaha" @@ -44432,46 +44846,61 @@ pr_id_to_video: { description: "Pytorch implementation of Exploring Simple Siamese Representation Learning" } repositories: { - url: "https://github.com/IgorSusmelj/simsiam-cifar10" - owner: "IgorSusmelj" + url: "https://github.com/sayakpaul/SimSiam-TF" + owner: "sayakpaul" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 89 + description: "Minimal implementation of SimSiam (https://arxiv.org/abs/2011.10566) in TensorFlow 2." + } + repositories: { + url: "https://github.com/juneweng/byol-pytorch" + owner: "juneweng" framework: FRAMEWORK_PYTORCH - number_of_stars: 28 - description: "Code to train the SimSiam model on cifar10 using PyTorch" + number_of_stars: 1 + description: "use cifar10 dataset to run byol, refer to lucidrains" } repositories: { - url: "https://github.com/taoyang1122/pytorch-SimSiam" - owner: "taoyang1122" + url: "https://github.com/PatrickHua/SimSiam" + owner: "PatrickHua" framework: FRAMEWORK_PYTORCH - number_of_stars: 63 - description: "A PyTorch re-implementation of the paper 'Exploring Simple Siamese Representation Learning'. Reproduced the 67.8% Top1 Acc on ImageNet." + number_of_stars: 547 + description: "A pytorch implementation for paper 'Exploring Simple Siamese Representation Learning'" + } + repositories: { + url: "https://github.com/leaderj1001/SimSiam" + owner: "leaderj1001" + framework: FRAMEWORK_PYTORCH + number_of_stars: 45 + description: "Exploring Simple Siamese Representation Learning" } repositories: { url: "https://github.com/PaperCodeReview/SimSiam-TF" owner: "PaperCodeReview" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 + number_of_stars: 10 description: "TF 2.x implementation of SimSiam (Exploring Simple Siamese Representation Learning, CVPR 2021)" } repositories: { url: "https://github.com/Reza-Safdari/SimSiam" owner: "Reza-Safdari" framework: FRAMEWORK_PYTORCH - number_of_stars: 12 + number_of_stars: 13 description: "Pytorch implementation of the paper Exploring Simple Siamese Representation Learning." } repositories: { - url: "https://github.com/leaderj1001/SimSiam" - owner: "leaderj1001" + url: "https://github.com/taoyang1122/pytorch-SimSiam" + owner: "taoyang1122" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "Exploring Simple Siamese Representation Learning" + number_of_stars: 65 + description: "A PyTorch re-implementation of the paper 'Exploring Simple Siamese Representation Learning'. Reproduced the 67.8% Top1 Acc on ImageNet." } repositories: { - url: "https://github.com/sayakpaul/SimSiam-TF" - owner: "sayakpaul" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 88 - description: "Minimal implementation of SimSiam (https://arxiv.org/abs/2011.10566) in TensorFlow 2." + is_official: true + url: "https://github.com/facebookresearch/simsiam" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 317 + description: "PyTorch implementation of SimSiam https//arxiv.org/abs/2011.10566" } } papers: { @@ -44501,7 +44930,7 @@ pr_id_to_video: { url: "https://github.com/CupidJay/Scaled-down-self-supervised-learning" owner: "CupidJay" framework: FRAMEWORK_PYTORCH - number_of_stars: 15 + number_of_stars: 16 description: "official pytorch implementation of Rethining Self-supervised Learning: Small is Beautiful." } } @@ -44550,7 +44979,7 @@ pr_id_to_video: { url: "https://github.com/yaohungt/Barlow-Twins-HSIC" owner: "yaohungt" framework: FRAMEWORK_PYTORCH - number_of_stars: 29 + number_of_stars: 34 } } papers: { @@ -44595,8 +45024,8 @@ pr_id_to_video: { video: { video_id: "Z1Os54oND8s" video_title: "PR-305: Exploring Simple Siamese Representation Learning" - number_of_likes: 19 - number_of_views: 795 + number_of_likes: 25 + number_of_views: 984 published_date: { seconds: 1616343171 } @@ -44621,16 +45050,11 @@ pr_id_to_video: { authors: "Marcus Rohrbach" authors: "Devi Parikh" authors: "Stefan Lee" - repositories: { - url: "https://github.com/johntiger1/multitask_multimodal" - owner: "johntiger1" - framework: FRAMEWORK_PYTORCH - } repositories: { url: "https://github.com/Cloud-CV/vilbert-multi-task" owner: "Cloud-CV" framework: FRAMEWORK_OTHERS - number_of_stars: 21 + number_of_stars: 22 description: ":eyes: :speaking_head: :memo:12-in-1: Multi-Task Vision and Language Representation Learning Web Demo" } repositories: { @@ -44642,16 +45066,27 @@ pr_id_to_video: { url: "https://github.com/jiasenlu/vilbert_beta" owner: "jiasenlu" framework: FRAMEWORK_PYTORCH - number_of_stars: 392 + number_of_stars: 396 } repositories: { is_official: true url: "https://github.com/facebookresearch/vilbert-multi-task" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 516 + number_of_stars: 536 description: "Multi Task Vision and Language" } + repositories: { + url: "https://github.com/johntiger1/multitask_multimodal" + owner: "johntiger1" + framework: FRAMEWORK_PYTORCH + } + repositories: { + url: "https://github.com/dw-dengwei/vilbert" + owner: "dw-dengwei" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1 + } } papers: { paper_id: "towards-general-purpose-vision-systems" @@ -44669,7 +45104,7 @@ pr_id_to_video: { url: "https://github.com/allenai/gpv-1" owner: "allenai" framework: FRAMEWORK_PYTORCH - number_of_stars: 20 + number_of_stars: 36 description: "A task-agnostic vision-language architecture as a step towards General Purpose Vision" } } @@ -44766,7 +45201,7 @@ pr_id_to_video: { url: "https://github.com/zhegan27/VILLA" owner: "zhegan27" framework: FRAMEWORK_PYTORCH - number_of_stars: 78 + number_of_stars: 82 description: "Research Code for NeurIPS 2020 Spotlight paper \"Large-Scale Adversarial Training for Vision-and-Language Representation Learning\": UNITER adversarial training part" } } @@ -44791,7 +45226,7 @@ pr_id_to_video: { url: "https://github.com/mczhuge/Kaleido-BERT" owner: "mczhuge" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 62 + number_of_stars: 68 description: "(CVPR2021) Kaleido-BERT: Vision-Language Pre-training on Fashion Domain" } methods: { @@ -44858,55 +45293,55 @@ pr_id_to_video: { authors: "Y-Lan Boureau" authors: "Jason Weston" methods: { - name: "BPE" - full_name: "Byte Pair Encoding" - description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." } methods: { name: "Transformer" full_name: "Transformer" description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } methods: { name: "ResNeXt Block" full_name: "ResNeXt Block" description: "A **ResNeXt Block** is a type of residual block used as part of the [ResNeXt](https://paperswithcode.com/method/resnext) CNN architecture. It uses a \"split-transform-merge\" strategy (branched paths within a single module) similar to an [Inception module](https://paperswithcode.com/method/inception-module), i.e. it aggregates a set of transformations. Compared to a Residual Block, it exposes a new dimension, *cardinality* (size of set of transformations) $C$, as an essential factor in addition to depth and width. \r\n\r\nFormally, a set of aggregated transformations can be represented as: $\\mathcal{F}(x)=\\sum_{i=1}^{C}\\mathcal{T}_i(x)$, where $\\mathcal{T}_i(x)$ can be an arbitrary function. Analogous to a simple neuron, $\\mathcal{T}_i$ should project $x$ into an (optionally low-dimensional) embedding and then transform it." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." - } - methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." - } - methods: { - name: "Kaiming Initialization" - full_name: "Kaiming Initialization" - description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." + name: "Grouped Convolution" + full_name: "Grouped Convolution" + description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." } methods: { - name: "1x1 Convolution" - full_name: "1x1 Convolution" - description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { name: "Layer Normalization" full_name: "Layer Normalization" description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } - methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } } papers: { paper_id: "behind-the-scene-revealing-the-secrets-of-pre" @@ -44948,21 +45383,21 @@ pr_id_to_video: { authors: "Furu Wei" authors: "Yejin Choi" authors: "Jianfeng Gao" + repositories: { + url: "https://github.com/ThanThoai/Visual-Question-Answering_Vietnamese" + owner: "ThanThoai" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "Visual question answering vietnamese" + } repositories: { is_official: true url: "https://github.com/microsoft/Oscar" owner: "microsoft" framework: FRAMEWORK_PYTORCH - number_of_stars: 564 + number_of_stars: 613 description: "Oscar and VinVL" } - repositories: { - url: "https://github.com/ThanThoai/Visual-Question-Answering_Vietnamese" - owner: "ThanThoai" - framework: FRAMEWORK_PYTORCH - number_of_stars: 9 - description: "Visual question answering vietnamese" - } } papers: { paper_id: "unifying-vision-and-language-tasks-via-text" @@ -44981,7 +45416,7 @@ pr_id_to_video: { url: "https://github.com/j-min/VL-T5" owner: "j-min" framework: FRAMEWORK_PYTORCH - number_of_stars: 82 + number_of_stars: 99 description: "PyTorch code for \"Unifying Vision-and-Language Tasks via Text Generation\" (ICML 2021)" } } @@ -44989,7 +45424,7 @@ pr_id_to_video: { video_id: "6K1-kmMF_D8" video_title: "PR-306: 12-in-1: Multi-Task Vision and Language Representation Learning" number_of_likes: 8 - number_of_views: 282 + number_of_views: 315 published_date: { seconds: 1616626936 } @@ -45018,7 +45453,7 @@ pr_id_to_video: { url: "https://github.com/google-research/google-research" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 18411 + number_of_stars: 18790 description: "Google Research" } } @@ -45052,6 +45487,13 @@ pr_id_to_video: { authors: "Ludovic Denoyer" authors: "Marc'Aurelio Ranzato" authors: "Y-Lan Boureau" + repositories: { + url: "https://github.com/facebookresearch/MultipleAttributeTextRewriting" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 62 + description: "This paper shows how to rewrite text while controlling multiple attributes using techniques from back-translation." + } repositories: { url: "https://github.com/clock-me/text-restyle" owner: "clock-me" @@ -45059,13 +45501,6 @@ pr_id_to_video: { number_of_stars: 3 description: "This repo contains pet-project re-implementation of paper Multiple-Attribute Text Style Transfer." } - repositories: { - url: "https://github.com/facebookresearch/MultipleAttributeTextRewriting" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 59 - description: "This paper shows how to rewrite text while controlling multiple attributes using techniques from back-translation." - } } papers: { paper_id: "meta-learning-for-knowledge-distillation" @@ -45083,7 +45518,7 @@ pr_id_to_video: { url: "https://github.com/JetRunner/MetaDistil" owner: "JetRunner" framework: FRAMEWORK_OTHERS - number_of_stars: 10 + number_of_stars: 13 description: "Code for the paper \"Meta Learning for Knowledge Distillation\"." } } @@ -45135,7 +45570,7 @@ pr_id_to_video: { url: "https://github.com/linjx-ustc1106/MT-GAN-PyTorch" owner: "linjx-ustc1106" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 + number_of_stars: 25 description: "PyTorch Implementation of \"Learning to Transfer: Unsupervised Domain Translation via Meta-Learning\" " } methods: { @@ -45173,8 +45608,8 @@ pr_id_to_video: { video: { video_id: "MsAAo1TJJhs" video_title: "PR-307: Meta Back Translation" - number_of_likes: 1 - number_of_views: 170 + number_of_likes: 2 + number_of_views: 199 published_date: { seconds: 1616982331 } @@ -45234,7 +45669,7 @@ pr_id_to_video: { url: "https://github.com/wenhuchen/Meta-Module-Network" owner: "wenhuchen" framework: FRAMEWORK_PYTORCH - number_of_stars: 23 + number_of_stars: 24 description: "Code for WACV 2021 Paper \"Meta Module Network for Compositional Visual Reasoning\"" } } @@ -45275,7 +45710,7 @@ pr_id_to_video: { url: "https://github.com/mesnico/TERN" owner: "mesnico" framework: FRAMEWORK_PYTORCH - number_of_stars: 35 + number_of_stars: 37 description: "Code and Resources for the Transformer Encoder Reasoning Network (TERN) - https://arxiv.org/abs/2004.09144" } methods: { @@ -45347,7 +45782,7 @@ pr_id_to_video: { url: "https://github.com/KunpengLi1994/VSRN" owner: "KunpengLi1994" framework: FRAMEWORK_PYTORCH - number_of_stars: 197 + number_of_stars: 196 description: "PyTorch code for ICCV'19 paper \"Visual Semantic Reasoning for Image-Text Matching\"" } methods: { @@ -45370,12 +45805,17 @@ pr_id_to_video: { authors: "Kwan-Yee Kenneth Wong" authors: "Joshua B. Tenenbaum" authors: "Chuang Gan" + methods: { + name: "RUN" + full_name: "Rung Kutta optimization" + description: "The optimization field suffers from the metaphor-based “pseudo-novel” or “fancy” optimizers. Most of these cliché methods mimic animals' searching trends and possess a small contribution to the optimization process itself. Most of these cliché methods suffer from the locally efficient performance, biased verification methods on easy problems, and high similarity between their components' interactions. This study attempts to go beyond the traps of metaphors and introduce a novel metaphor-free population-based optimization method based on the mathematical foundations and ideas of the Runge Kutta (RK) method widely well-known in mathematics. The proposed RUNge Kutta optimizer (RUN) was developed to deal with various types of optimization problems in the future. The RUN utilizes the logic of slope variations computed by the RK method as a promising and logical searching mechanism for global optimization. This search mechanism benefits from two active exploration and exploitation phases for exploring the promising regions in the feature space and constructive movement toward the global best solution. Furthermore, an enhanced solution quality (ESQ) mechanism is employed to avoid the local optimal solutions and increase convergence speed. The RUN algorithm's efficiency was evaluated by comparing with other metaheuristic algorithms in 50 mathematical test functions and four real-world engineering problems. The RUN provided very promising and competitive results, showing superior exploration and exploitation tendencies, fast convergence rate, and local optima avoidance. In optimizing the constrained engineering problems, the metaphor-free RUN demonstrated its suitable performance as well. The authors invite the community for extensive evaluations of this deep-rooted optimizer as a promising tool for real-world optimization" + } } video: { video_id: "yaJYIEgqC4Q" video_title: "PR-308: Visual Concept Reasoning Networks" - number_of_likes: 4 - number_of_views: 347 + number_of_likes: 6 + number_of_views: 399 published_date: { seconds: 1616951139 } @@ -45402,21 +45842,21 @@ pr_id_to_video: { url: "https://github.com/lab-ml/nn/tree/master/labml_nn/transformers/switch" owner: "transformers" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + number_of_stars: 3494 + description: "🧑‍🏫 Implementations/tutorials of deep learning papers with side-by-side notes 📝; including transformers (original, xl, switch, feedback, vit), optimizers (adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), 🎮 reinforcement learning (ppo, dqn), capsnet, distillation, etc. 🧠" } repositories: { is_official: true url: "https://github.com/tensorflow/mesh" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 1064 + number_of_stars: 1086 description: "Mesh TensorFlow: Model Parallelism Made Easier" } } papers: { paper_id: "exploring-sparse-expert-models-and-beyond" - title: "Exploring Sparse Expert Models and Beyond" + title: "M6-T: Exploring Sparse Expert Models and Beyond" arxiv_id: "2105.15082" abstract: "Mixture-of-Experts (MoE) models can achieve promising results with outrageous large amount of parameters but constant computation cost, and thus it has become a trend in model scaling. Still it is a mystery how MoE layers bring quality gains by leveraging the parameters with sparse activation. In this work, we investigate several key factors in sparse expert models. We observe that load imbalance may not be a significant problem affecting model quality, contrary to the perspectives of recent studies, while the number of sparsely activated experts $k$ and expert capacity $C$ in top-$k$ routing can significantly make a difference in this context. Furthermore, we take a step forward to propose a simple method called expert prototyping that splits experts into different prototypes and applies $k$ top-$1$ routing. This strategy improves the model quality but maintains constant computational costs, and our further exploration on extremely large-scale models reflects that it is more effective in training larger models. We push the model scale to over $1$ trillion parameters and implement it on solely $480$ NVIDIA V100-32GB GPUs, in comparison with the recent SOTAs on $2048$ TPU cores. The proposed giant model achieves substantial speedup in convergence over the same-size baseline." published_date: { @@ -45507,7 +45947,7 @@ pr_id_to_video: { url: "https://github.com/laekov/fastmoe" owner: "laekov" framework: FRAMEWORK_PYTORCH - number_of_stars: 465 + number_of_stars: 494 description: "A fast MoE impl for PyTorch" } methods: { @@ -45556,9 +45996,9 @@ pr_id_to_video: { description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "FastMoE" + full_name: "FastMoE" + description: "**FastMoE ** is a distributed MoE training system based on PyTorch with common accelerators. The system provides a hierarchical interface for both flexible model design and adaption to different applications, such as Transformer-XL and Megatron-LM." } } papers: { @@ -45674,15 +46114,15 @@ pr_id_to_video: { url: "https://github.com/tensorflow/neural-structured-learning" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 835 + number_of_stars: 846 description: "Training neural models with structured signals." } } papers: { paper_id: "efficient-large-scale-language-model-training" - title: "Efficient Large-Scale Language Model Training on GPU Clusters" + title: "Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM" arxiv_id: "2104.04473" - abstract: "Large language models have led to state-of-the-art accuracies across a range of tasks. However, training these large models efficiently is challenging for two reasons: a) GPU memory capacity is limited, making it impossible to fit large models on a single GPU or even on a multi-GPU server; and b) the number of compute operations required to train these models can result in unrealistically long training times. New methods of model parallelism such as tensor and pipeline parallelism have been proposed to address these challenges. Unfortunately, naive usage leads to fundamental scaling issues at thousands of GPUs due to various reasons, e.g., expensive cross-node communication or idle periods waiting on other devices. In this work, we show how to compose different types of parallelism methods (tensor, pipeline, and data parallelism) to scale to thousands of GPUs, achieving a two-order-of-magnitude increase in the sizes of models we can efficiently train compared to existing systems. We survey techniques for pipeline parallelism and propose a novel interleaved pipeline parallelism schedule that can improve throughput by more than 10% with comparable memory footprint compared to previously-proposed approaches. We quantitatively study the trade-offs between tensor, pipeline, and data parallelism, and provide intuition as to how to configure distributed training of a large model. Our approach allows us to perform training iterations on a model with 1 trillion parameters at 502 petaFLOP/s on 3072 GPUs with achieved per-GPU throughput of 52% of peak; previous efforts to train similar-sized models achieve much lower throughput (36% of theoretical peak). Our code is open sourced at https://github.com/nvidia/megatron-lm." + abstract: "Large language models have led to state-of-the-art accuracies across a range of tasks. However, training these models efficiently is challenging for two reasons: a) GPU memory capacity is limited, making it impossible to fit large models on even a multi-GPU server, and b) the number of compute operations required to train these models can result in unrealistically long training times. Consequently, new methods of model parallelism such as tensor and pipeline parallelism have been proposed. Unfortunately, naive usage of these methods leads to fundamental scaling issues at thousands of GPUs, e.g., due to expensive cross-node communication or devices spending significant time waiting on other devices to make progress. In this paper, we show how different types of parallelism methods (tensor, pipeline, and data parallelism) can be composed to scale to thousands of GPUs and models with trillions of parameters. We survey techniques for pipeline parallelism and propose a novel interleaved pipeline parallelism schedule that can improve throughput by 10+% with memory footprint comparable to existing approaches. We quantitatively study the trade-offs between tensor, pipeline, and data parallelism, and provide intuition as to how to configure distributed training of a large model. Our approach allows us to perform training iterations on a model with 1 trillion parameters at 502 petaFLOP/s on 3072 GPUs with achieved per-GPU throughput of 52% of theoretical peak. Our code is open sourced at https://github.com/nvidia/megatron-lm." published_date: { seconds: 1617926400 } @@ -45703,7 +46143,7 @@ pr_id_to_video: { url: "https://github.com/NVIDIA/Megatron-LM" owner: "NVIDIA" framework: FRAMEWORK_PYTORCH - number_of_stars: 2214 + number_of_stars: 2284 description: "Ongoing research training transformer language models at scale, including: BERT & GPT-2" } } @@ -45724,20 +46164,20 @@ pr_id_to_video: { authors: "Maxim Krikun" authors: "Noam Shazeer" authors: "Zhifeng Chen" - repositories: { - url: "https://github.com/facebookresearch/fairscale" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1251 - description: "PyTorch extensions for high performance and large scale training." - } repositories: { url: "https://github.com/lucidrains/mixture-of-experts" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 91 + number_of_stars: 94 description: "A Pytorch implementation of Sparsely-Gated Mixture of Experts, for massively increasing the parameter count of language models" } + repositories: { + url: "https://github.com/facebookresearch/fairscale" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1307 + description: "PyTorch extensions for high performance and large scale training." + } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -45773,6 +46213,11 @@ pr_id_to_video: { full_name: "Softmax" description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } + methods: { + name: "GShard" + full_name: "GShard" + description: "**GShard** is a intra-layer parallel distributed method. It consists of set of simple APIs for annotations, and a compiler extension in XLA for automatic parallelization." + } methods: { name: "ReLU" full_name: "Rectified Linear Units" @@ -45783,17 +46228,12 @@ pr_id_to_video: { full_name: "Adam" description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." } - methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" - } } video: { video_id: "cy1q90Uyi48" video_title: "PR-309: Switch Transformers: Scaling To Trillion Parameter Models WithSimple And Efficient Sparsity" - number_of_likes: 6 - number_of_views: 439 + number_of_likes: 8 + number_of_views: 526 published_date: { seconds: 1617546644 } @@ -45829,13 +46269,18 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/vissl" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1788 + number_of_stars: 1878 description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." } methods: { - name: "MoCo" - full_name: "Momentum Contrast" - description: "**MoCo**, or **Momentum Contrast**, is a self-supervised learning algorithm with a contrastive loss. \r\n\r\nContrastive loss methods can be thought of as building dynamic dictionaries. The \"keys\" (tokens) in the dictionary are sampled from data (e.g., images or patches) and are represented by an encoder network. Unsupervised learning trains encoders to perform dictionary look-up: an encoded “query” should be similar to its matching key and dissimilar to others. Learning is formulated as minimizing a contrastive loss. \r\n\r\nMoCo can be viewed as a way to build large and consistent dictionaries for unsupervised learning with a contrastive loss. In MoCo, we maintain the dictionary as a queue of data samples: the encoded representations of the current mini-batch are enqueued, and the oldest are dequeued. The queue decouples the dictionary size from the mini-batch size, allowing it to be large. Moreover, as the dictionary keys come from the preceding several mini-batches, a slowly progressing key encoder, implemented as a momentum-based moving average of the query encoder, is proposed to maintain consistency." + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." } methods: { name: "LARS" @@ -45843,51 +46288,46 @@ pr_id_to_video: { description: "**Layer-wise Adaptive Rate Scaling**, or **LARS**, is a large batch optimization technique. There are two notable differences between LARS and other adaptive algorithms such as [Adam](https://paperswithcode.com/method/adam) or [RMSProp](https://paperswithcode.com/method/rmsprop): first, LARS uses a separate learning rate for each layer and not for each weight. And second, the magnitude of the update is controlled with respect to the weight norm for better control of training speed.\r\n\r\n$$m\\_{t} = \\beta\\_{1}m\\_{t-1} + \\left(1-\\beta\\_{1}\\right)\\left(g\\_{t} + \\lambda{x\\_{t}}\\right)$$\r\n$$x\\_{t+1}^{\\left(i\\right)} = x\\_{t}^{\\left(i\\right)} - \\eta\\_{t}\\frac{\\phi\\left(|| x\\_{t}^{\\left(i\\right)} ||\\right)}{|| m\\_{t}^{\\left(i\\right)} || }m\\_{t}^{\\left(i\\right)} $$" } methods: { - name: "Average Pooling" - full_name: "Average Pooling" - description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" } methods: { - name: "RegNetY" - full_name: "RegNetY" - description: "**RegNetY** is a convolutional network design space with simple, regular models with parameters: depth $d$, initial width $w\\_{0} > 0$, and slope $w\\_{a} > 0$, and generates a different block width $u\\_{j}$ for each block $j < d$. The key restriction for the RegNet types of model is that there is a linear parameterisation of block widths (the design space only contains models with this linear structure):\r\n\r\n$$ u\\_{j} = w\\_{0} + w\\_{a}\\cdot{j} $$\r\n\r\nFor **RegNetX** we have additional restrictions: we set $b = 1$ (the bottleneck ratio), $12 \\leq d \\leq 28$, and $w\\_{m} \\geq 2$ (the width multiplier).\r\n\r\nFor **RegNetY** we make one change, which is to include Squeeze-and-Excitation blocks." + name: "Grouped Convolution" + full_name: "Grouped Convolution" + description: "A **Grouped Convolution** uses a group of convolutions - multiple kernels per layer - resulting in multiple channel outputs per layer. This leads to wider networks helping a network learn a varied set of low level and high level features. The original motivation of using Grouped Convolutions in [AlexNet](https://paperswithcode.com/method/alexnet) was to distribute the model over multiple GPUs as an engineering compromise. But later, with models such as [ResNeXt](https://paperswithcode.com/method/alexnet), it was shown this module could be used to improve classification accuracy. Specifically by exposing a new dimension through grouped convolutions, *cardinality* (the size of set of transformations), we can increase accuracy by increasing it." } methods: { - name: "Global Average Pooling" - full_name: "Global Average Pooling" - description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." } methods: { - name: "InfoNCE" - full_name: "InfoNCE" - description: "**InfoNCE**, where NCE stands for Noise-Contrastive Estimation, is a type of contrastive loss function used for [self-supervised learning](https://paperswithcode.com/methods/category/self-supervised-learning).\r\n\r\nGiven a set $X = ${$x\\_{1}, \\dots, x\\_{N}$} of $N$ random samples containing one positive sample from $p\\left(x\\_{t+k}|c\\_{t}\\right)$ and $N − 1$ negative samples from the 'proposal' distribution $p\\left(x\\_{t+k}\\right)$, we optimize:\r\n\r\n$$ \\mathcal{L}\\_{N} = - \\mathbb{E}\\_{X}\\left[\\log\\frac{f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right)}{\\sum\\_{x\\_{j}\\in{X}}f\\_{k}\\left(x\\_{j}, c\\_{t}\\right)}\\right] $$\r\n\r\nOptimizing this loss will result in $f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right)$ estimating the density ratio, which is:\r\n\r\n$$ f\\_{k}\\left(x\\_{t+k}, c\\_{t}\\right) \\propto \\frac{p\\left(x\\_{t+k}|c\\_{t}\\right)}{p\\left(x\\_{t+k}\\right)} $$" + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { - name: "ColorJitter" - full_name: "Color Jitter" - description: "**ColorJitter** is a type of image data augmentation where we randomly change the brightness, contrast and saturation of an image.\r\n\r\nImage Credit: [Apache MXNet](https://mxnet.apache.org/versions/1.5.0/tutorials/gluon/data_augmentation.html)" + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" } methods: { - name: "Bottleneck Residual Block" - full_name: "Bottleneck Residual Block" - description: "A **Bottleneck Residual Block** is a variant of the [residual block](https://paperswithcode.com/method/residual-block) that utilises 1x1 convolutions to create a bottleneck. The use of a bottleneck reduces the number of parameters and matrix multiplications. The idea is to make residual blocks as thin as possible to increase depth and have less parameters. They were introduced as part of the [ResNet](https://paperswithcode.com/method/resnet) architecture, and are used as part of deeper ResNets such as ResNet-50 and ResNet-101." + name: "Squeeze-and-Excitation Block" + full_name: "Squeeze-and-Excitation Block" + description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "Kaiming Initialization" + full_name: "Kaiming Initialization" + description: "**Kaiming Initialization**, or **He Initialization**, is an initialization method for neural networks that takes into account the non-linearity of activation functions, such as ReLU activations.\r\n\r\nA proper initialization method should avoid reducing or magnifying the magnitudes of input signals exponentially. Using a derivation they work out that the condition to stop this happening is:\r\n\r\n$$\\frac{1}{2}n\\_{l}\\text{Var}\\left[w\\_{l}\\right] = 1 $$\r\n\r\nThis implies an initialization scheme of:\r\n\r\n$$ w\\_{l} \\sim \\mathcal{N}\\left(0, 2/n\\_{l}\\right)$$\r\n\r\nThat is, a zero-centered Gaussian with standard deviation of $\\sqrt{2/{n}\\_{l}}$ (variance shown in equation above). Biases are initialized at $0$." } } papers: { paper_id: "efficient-visual-pretraining-with-contrastive" title: "Efficient Visual Pretraining with Contrastive Detection" arxiv_id: "2103.10957" - abstract: "Self-supervised pretraining has been shown to yield powerful representations for transfer learning. These performance gains come at a large computational cost however, with state-of-the-art methods requiring an order of magnitude more computation than supervised pretraining. We tackle this computational bottleneck by introducing a new self-supervised objective, contrastive detection, which tasks representations with identifying object-level features across augmentations. This objective extracts a rich learning signal per image, leading to state-of-the-art transfer performance from ImageNet to COCO, while requiring up to 5x less pretraining. In particular, our strongest ImageNet-pretrained model performs on par with SEER, one of the largest self-supervised systems to date, which uses 1000x more pretraining data. Finally, our objective seamlessly handles pretraining on more complex images such as those in COCO, closing the gap with supervised transfer learning from COCO to PASCAL." + abstract: "Self-supervised pretraining has been shown to yield powerful representations for transfer learning. These performance gains come at a large computational cost however, with state-of-the-art methods requiring an order of magnitude more computation than supervised pretraining. We tackle this computational bottleneck by introducing a new self-supervised objective, contrastive detection, which tasks representations with identifying object-level features across augmentations. This objective extracts a rich learning signal per image, leading to state-of-the-art transfer accuracy on a variety of downstream tasks, while requiring up to 10x less pretraining. In particular, our strongest ImageNet-pretrained model performs on par with SEER, one of the largest self-supervised systems to date, which uses 1000x more pretraining data. Finally, our objective seamlessly handles pretraining on more complex images such as those in COCO, closing the gap with supervised transfer learning from COCO to PASCAL." published_date: { seconds: 1616112000 } @@ -45979,6 +46419,7 @@ pr_id_to_video: { owner: "UMBCvision" framework: FRAMEWORK_PYTORCH number_of_stars: 9 + description: "Official implementation of the paper \"Backdoor Attacks on Self-Supervised Learning\"." } } papers: { @@ -46010,8 +46451,8 @@ pr_id_to_video: { video: { video_id: "fvecOZnGV6Y" video_title: "PR-310: Self-supervised Pretraining of Visual Features in the Wild" - number_of_likes: 17 - number_of_views: 752 + number_of_likes: 19 + number_of_views: 852 published_date: { seconds: 1617549074 } @@ -46096,7 +46537,7 @@ pr_id_to_video: { url: "https://github.com/deepmind/lab" owner: "deepmind" framework: FRAMEWORK_OTHERS - number_of_stars: 6493 + number_of_stars: 6514 description: "A customisable 3D platform for agent-based AI research" } } @@ -46123,7 +46564,7 @@ pr_id_to_video: { url: "https://github.com/deepmind/streetlearn" owner: "deepmind" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 220 + number_of_stars: 223 description: "A C++/Python implementation of the StreetLearn environment based on images from Street View, as well as a TensorFlow implementation of goal-driven navigation agents solving the task published in “Learning to Navigate in Cities Without a Map”, NeurIPS 2018" } repositories: { @@ -46187,7 +46628,7 @@ pr_id_to_video: { url: "https://github.com/gkahn13/LaND" owner: "gkahn13" framework: FRAMEWORK_OTHERS - number_of_stars: 26 + number_of_stars: 27 } } papers: { @@ -46215,7 +46656,7 @@ pr_id_to_video: { url: "https://github.com/99andBeyond/Apollo1060" owner: "99andBeyond" framework: FRAMEWORK_OTHERS - number_of_stars: 47 + number_of_stars: 50 description: "Data and models (with prediction scripts) used in publications related to Apollo1060 platform" } } @@ -46234,7 +46675,7 @@ pr_id_to_video: { video_id: "5RfVz-oj15k" video_title: "PR-311: Learning to Navigate the Web" number_of_likes: 3 - number_of_views: 150 + number_of_views: 164 published_date: { seconds: 1618155046 } @@ -46262,7 +46703,7 @@ pr_id_to_video: { url: "https://github.com/EmilienDupont/neural-function-distributions" owner: "EmilienDupont" framework: FRAMEWORK_PYTORCH - number_of_stars: 68 + number_of_stars: 72 description: "Pytorch implementation of Generative Models as Distributions of Functions 🌿" } } @@ -46393,8 +46834,8 @@ pr_id_to_video: { video: { video_id: "t2oyFXPLUwU" video_title: "PR-312: Generative Models as Distributions of Functions" - number_of_likes: 36 - number_of_views: 1067 + number_of_likes: 39 + number_of_views: 1194 published_date: { seconds: 1618155529 } @@ -46421,28 +46862,28 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/open_lth" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 427 + number_of_stars: 440 description: "A repository in preparation for open-sourcing lottery ticket hypothesis code." } repositories: { url: "https://github.com/sayakpaul/Training-BatchNorm-and-Only-BatchNorm" owner: "sayakpaul" framework: FRAMEWORK_OTHERS - number_of_stars: 21 + number_of_stars: 22 description: "Experiments with the ideas presented in https://arxiv.org/abs/2003.00152 by Frankle et al. " } repositories: { url: "https://github.com/wandb/gallery" owner: "wandb" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 186 + number_of_stars: 190 description: "This is a collection of the code that accompanies the reports in The Gallery by Weights & Biases." } repositories: { url: "https://github.com/wandb/awesome-dl-projects" owner: "wandb" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 186 + number_of_stars: 190 description: "This is a collection of the code that accompanies the reports in The Gallery by Weights & Biases." } methods: { @@ -46511,7 +46952,7 @@ pr_id_to_video: { url: "https://github.com/fk128/batchnorm-transferlearning" owner: "fk128" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 3 + number_of_stars: 4 description: "Partial transfusion: on the expressive influence of trainable batch norm parameters for transfer learning. TL;DR: Fine-tuning only the batch norm affine parameters leads to similar performance as to fine-tuning all of the model parameters" } } @@ -46569,7 +47010,7 @@ pr_id_to_video: { url: "https://github.com/RAIVNLab/supsup" owner: "RAIVNLab" framework: FRAMEWORK_PYTORCH - number_of_stars: 78 + number_of_stars: 79 description: "Code for \"Supermasks in Superposition\"" } } @@ -46627,6 +47068,14 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH number_of_stars: 1 } + repositories: { + is_official: true + url: "https://github.com/google/uncertainty-baselines" + owner: "google" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 514 + description: "High-quality implementations of standard and SOTA methods on a variety of tasks." + } methods: { name: "SGD" full_name: "Stochastic Gradient Descent" @@ -46636,8 +47085,8 @@ pr_id_to_video: { video: { video_id: "bI8ceHOoYxk" video_title: "PR-313: Training BatchNorm and Only BatchNorm: On the Expressive Power of Random Features in CNNs" - number_of_likes: 18 - number_of_views: 593 + number_of_likes: 20 + number_of_views: 662 published_date: { seconds: 1618762314 } @@ -46653,31 +47102,32 @@ pr_id_to_video: { paper_id: "vatt-transformers-for-multimodal-self" title: "VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio and Text" arxiv_id: "2104.11178" - abstract: "We present a framework for learning multimodal representations from unlabeled data using convolution-free Transformer architectures. Specifically, our Video-Audio-Text Transformer (VATT) takes raw signals as inputs and extracts multimodal representations that are rich enough to benefit a variety of downstream tasks. We train VATT end-to-end from scratch using multimodal contrastive losses and evaluate its performance by the downstream tasks of video action recognition, audio event classification, image classification, and text-to-video retrieval. Furthermore, we study a modality-agnostic single-backbone Transformer by sharing weights among the three modalities. We show that the convolution-free VATT outperforms state-of-the-art ConvNet-based architectures in the downstream tasks. Especially, VATT's vision Transformer achieves the top-1 accuracy of 82.1% on Kinetics-400, 83.6% on Kinetics-600,and 41.1% on Moments in Time, new records while avoiding supervised pre-training. Transferring to image classification leads to 78.7% top-1 accuracy on ImageNet compared to 64.7% by training the same Transformer from scratch, showing the generalizability of our model despite the domain gap between videos and images. VATT's audio Transformer also sets a new record on waveform-based audio event recognition by achieving the mAP of 39.4% on AudioSet without any supervised pre-training." + abstract: "We present a framework for learning multimodal representations from unlabeled data using convolution-free Transformer architectures. Specifically, our Video-Audio-Text Transformer (VATT) takes raw signals as inputs and extracts multimodal representations that are rich enough to benefit a variety of downstream tasks. We train VATT end-to-end from scratch using multimodal contrastive losses and evaluate its performance by the downstream tasks of video action recognition, audio event classification, image classification, and text-to-video retrieval. Furthermore, we study a modality-agnostic single-backbone Transformer by sharing weights among the three modalities. We show that the convolution-free VATT outperforms state-of-the-art ConvNet-based architectures in the downstream tasks. Especially, VATT's vision Transformer achieves the top-1 accuracy of 82.1% on Kinetics-400, 83.6% on Kinetics-600,and 41.1% on Moments in Time, new records while avoiding supervised pre-training. Transferring to image classification leads to 78.7% top-1 accuracy on ImageNet compared to 64.7% by training the same Transformer from scratch, showing the generalizability of our model despite the domain gap between videos and images. VATT's audio Transformer also sets a new record on waveform-based audio event recognition by achieving the mAP of 39.4% on AudioSet without any supervised pre-training. VATT's source code is publicly available." published_date: { seconds: 1619049600 } authors: "Hassan Akbari" - authors: "Linagzhe Yuan" + authors: "Liangzhe Yuan" authors: "Rui Qian" authors: "Wei-Hong Chuang" authors: "Shih-Fu Chang" authors: "Yin Cui" authors: "Boqing Gong" - repositories: { - url: "https://github.com/akashe/ProgrammingInterview" - owner: "akashe" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - } repositories: { is_official: true url: "https://github.com/tensorflow/models" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 70579 + number_of_stars: 70934 description: "Models and examples built with TensorFlow" } + repositories: { + url: "https://github.com/akashe/ProgrammingInterview" + owner: "akashe" + framework: FRAMEWORK_PYTORCH + number_of_stars: 8 + description: "Code on selecting an action based on multimodal inputs. Here in this case inputs are voice and text." + } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -46716,7 +47166,7 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." } methods: { name: "Adam" @@ -46782,14 +47232,14 @@ pr_id_to_video: { description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Stochastic Depth" + full_name: "Stochastic Depth" + description: "**Stochastic Depth** aims to shrink the depth of a network during training, while\r\nkeeping it unchanged during testing. This is achieved by randomly dropping entire [ResBlocks](https://paperswithcode.com/method/residual-block) during training and bypassing their transformations through skip connections. \r\n\r\nLet $b\\_{l} \\in$ {$0, 1$} denote a Bernoulli random variable, which indicates whether the $l$th ResBlock is active ($b\\_{l} = 1$) or inactive ($b\\_{l} = 0$). Further, let us denote the “survival” probability of ResBlock $l$ as $p\\_{l} = \\text{Pr}\\left(b\\_{l} = 1\\right)$. With this definition we can bypass the $l$th ResBlock by multiplying its function $f\\_{l}$ with $b\\_{l}$ and we extend the update rule to:\r\n\r\n$$ H\\_{l} = \\text{ReLU}\\left(b\\_{l}f\\_{l}\\left(H\\_{l-1}\\right) + \\text{id}\\left(H\\_{l-1}\\right)\\right) $$\r\n\r\nIf $b\\_{l} = 1$, this reduces to the original ResNet update and this ResBlock remains unchanged. If $b\\_{l} = 0$, the ResBlock reduces to the identity function, $H\\_{l} = \\text{id}\\left((H\\_{l}−1\\right)$." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Swin Transformer" + full_name: "Swin Transformer" + description: "The **Swin Transformer** is a type of Vision Transformer. It builds hierarchical feature maps by merging image patches (shown in gray) in deeper layers and has linear computation complexity to input image size due to computation of self-attention only within each local window (shown in red). It can thus serve as a general-purpose backbone for both image classification and dense recognition tasks. In contrast, previous vision Transformers produce feature maps of a single low resolution and have quadratic computation complexity to input image size due to computation of self-attention globally." } } papers: { @@ -46812,7 +47262,7 @@ pr_id_to_video: { url: "https://github.com/rowanz/merlot" owner: "rowanz" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 71 + number_of_stars: 80 description: "MERLOT: Multimodal Neural Script Knowledge Models" } } @@ -46836,7 +47286,7 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/Motionformer" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 54 + number_of_stars: 59 description: "Code + pre-trained models for the paper Keeping Your Eye on the Ball Trajectory Attention in Video Transformers" } } @@ -46859,16 +47309,24 @@ pr_id_to_video: { url: "https://github.com/ed-fish/spatio-temporal-contrastive-film" owner: "ed-fish" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 + number_of_stars: 13 description: "Pytorch implementation of Spatio Temporal Contrastive Video Representation Learning : https://arxiv.org/abs/2008.03800" } repositories: { url: "https://github.com/ed-fish/spatio-temporal-contrastive-video" owner: "ed-fish" framework: FRAMEWORK_PYTORCH - number_of_stars: 11 + number_of_stars: 13 description: "Pytorch implementation of Spatio Temporal Contrastive Video Representation Learning : https://arxiv.org/abs/2008.03800" } + repositories: { + is_official: true + url: "https://github.com/tensorflow/models" + owner: "tensorflow" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 70934 + description: "Models and examples built with TensorFlow" + } methods: { name: "ResNet" full_name: "Residual Network" @@ -46949,29 +47407,34 @@ pr_id_to_video: { url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } + repositories: { + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } repositories: { is_official: true url: "https://github.com/google-research/nested-transformer" owner: "google-research" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 49 + number_of_stars: 59 description: "Aggregating Nested Transformer https://arxiv.org/pdf/2105.12723.pdf" } - repositories: { - url: "https://github.com/lucidrains/vit-pytorch" - owner: "lucidrains" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 - description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" - } methods: { name: "Scaled Dot-Product Attention" full_name: "Scaled Dot-Product Attention" description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } methods: { name: "Softmax" full_name: "Softmax" @@ -46980,7 +47443,12 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { name: "Multi-Head Attention" @@ -47004,19 +47472,25 @@ pr_id_to_video: { authors: "Xiangning Chen" authors: "Cho-Jui Hsieh" authors: "Boqing Gong" + repositories: { + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } repositories: { is_official: true url: "https://github.com/google-research/vision_transformer" owner: "google-research" framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + number_of_stars: 3306 } } video: { video_id: "rgXxAFIBido" video_title: "PR-314: VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio, and Text" - number_of_likes: 16 - number_of_views: 795 + number_of_likes: 19 + number_of_views: 933 published_date: { seconds: 1619969877 } @@ -47044,14 +47518,14 @@ pr_id_to_video: { url: "https://github.com/CompVis/taming-transformers" owner: "CompVis" framework: FRAMEWORK_PYTORCH - number_of_stars: 1610 + number_of_stars: 1754 description: "Taming Transformers for High-Resolution Image Synthesis" } repositories: { url: "https://github.com/tgisaturday/taming-transformers-tpu" owner: "tgisaturday" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 31 description: "Refactoring dalle-pytorch and taming-transformers for TPU VM" } methods: { @@ -47120,8 +47594,8 @@ pr_id_to_video: { url: "https://github.com/universome/alis" owner: "universome" framework: FRAMEWORK_PYTORCH - number_of_stars: 134 - description: "Aligning Latent and Image Spaces to Connect the Unconnectable" + number_of_stars: 137 + description: "Aligning Latent and Image Spaces to Connect the Unconnectable [ICCV 2021]" } methods: { name: "Adaptive Instance Normalization" @@ -47213,19 +47687,12 @@ pr_id_to_video: { authors: "Kai Zhong" authors: "Yiming Yang" authors: "Inderjit Dhillon" - repositories: { - url: "https://github.com/amzn/pecos" - owner: "amzn" - framework: FRAMEWORK_OTHERS - number_of_stars: 97 - description: "PECOS - Prediction for Enormous and Correlated Spaces" - } repositories: { is_official: true url: "https://github.com/OctoberChang/X-Transformer" owner: "OctoberChang" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 91 + number_of_stars: 95 description: "X-Transformer: Taming Pretrained Transformers for eXtreme Multi-label Text Classification" } methods: { @@ -47292,18 +47759,25 @@ pr_id_to_video: { authors: "Ting Chen" authors: "Dimitris N. Metaxas" authors: "Han Zhang" + repositories: { + url: "https://github.com/tgisaturday/taming-transformers-tpu" + owner: "tgisaturday" + framework: FRAMEWORK_PYTORCH + number_of_stars: 31 + description: "Refactoring dalle-pytorch and taming-transformers for TPU VM" + } repositories: { url: "https://github.com/tgisaturday/dalle-lightning-tpu" owner: "tgisaturday" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 31 description: "Refactoring dalle-pytorch and taming-transformers for TPU VM" } repositories: { - url: "https://github.com/tgisaturday/taming-transformers-tpu" + url: "https://github.com/tgisaturday/dalle-lightning" owner: "tgisaturday" framework: FRAMEWORK_PYTORCH - number_of_stars: 10 + number_of_stars: 31 description: "Refactoring dalle-pytorch and taming-transformers for TPU VM" } methods: { @@ -47479,7 +47953,7 @@ pr_id_to_video: { url: "https://github.com/THUDM/CogView" owner: "THUDM" framework: FRAMEWORK_PYTORCH - number_of_stars: 402 + number_of_stars: 467 description: "Text-to-Image generation" } methods: { @@ -47550,8 +48024,8 @@ pr_id_to_video: { video: { video_id: "GcbT0IGt0xE" video_title: "PR-315: Taming Transformers for High-Resolution Image Synthesis" - number_of_likes: 10 - number_of_views: 535 + number_of_likes: 14 + number_of_views: 684 published_date: { seconds: 1620110376 } @@ -47583,20 +48057,6 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Mario Lucic" authors: "Alexey Dosovitskiy" - repositories: { - url: "https://github.com/SauravMaheshkar/MLP-Mixer" - owner: "SauravMaheshkar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Minimal Flax implementation of MLP-Mixer from \"MLP-Mixer: An all-MLP Architecture for Vision\" (https://arxiv.org/abs/2105.01601)" - } - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } repositories: { url: "https://github.com/sekilab/image_processing_bootcamp2021" owner: "sekilab" @@ -47604,25 +48064,28 @@ pr_id_to_video: { number_of_stars: 2 } repositories: { - is_official: true - url: "https://github.com/google-research/vision_transformer" - owner: "google-research" - framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + url: "https://github.com/Benjamin-Etheredge/mlp-mixer-keras" + owner: "Benjamin-Etheredge" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 } repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/mlp_mixer" - owner: "transformers" + url: "https://github.com/imad08/MLP-Mixer" + owner: "imad08" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." } repositories: { - url: "https://github.com/bangoc123/mlp-mixer" - owner: "bangoc123" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/leondgarse/Keras_mlp" + owner: "leondgarse" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 56 - description: "Implementation for paper MLP-Mixer: An all-MLP Architecture for Vision" + number_of_stars: 4 + description: "Keras implementation of mlp-mixer, ResMLP. imagenet/imagenet21k weights reloaded." } repositories: { url: "https://github.com/lavish619/MLP-Mixer-PyTorch" @@ -47637,29 +48100,36 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH description: "Pytorch implementation of MLP Mixer" } - repositories: { - url: "https://github.com/04RR/SOTA-Vision" - owner: "04RR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of various state of the art architectures used in computer vision. " - } repositories: { url: "https://github.com/sayakpaul/MLP-Mixer-CIFAR10" owner: "sayakpaul" framework: FRAMEWORK_OTHERS - number_of_stars: 32 + number_of_stars: 34 description: "Implements MLP-Mixer (https://arxiv.org/abs/2105.01601) with the CIFAR-10 dataset. " } + repositories: { + url: "https://github.com/rishikksh20/MLP-Mixer-pytorch" + owner: "rishikksh20" + framework: FRAMEWORK_PYTORCH + number_of_stars: 137 + description: "Unofficial implementation of MLP-Mixer: An all-MLP Architecture for Vision" + } + repositories: { + url: "https://github.com/isaaccorley/mlp-mixer-pytorch" + owner: "isaaccorley" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "PyTorch implementation of \"MLP-Mixer: An all-MLP Architecture for Vision\" Tolstikhin et al. (2021)" + } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." } methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { name: "Dense Connections" @@ -47671,35 +48141,35 @@ pr_id_to_video: { full_name: "Label Smoothing" description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } methods: { name: "Transformer" full_name: "Transformer" description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } methods: { - name: "Vision Transformer" - full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "MLP-Mixer" + full_name: "MLP-Mixer" + description: "The **MLP-Mixer** architecture (or “Mixer” for short) is an image architecture that doesn't use convolutions or self-attention. Instead, Mixer’s architecture is based entirely on multi-layer perceptrons (MLPs) that are repeatedly applied across either spatial locations or feature channels. Mixer relies only on basic matrix multiplication routines, changes to data layout (reshapes and transpositions), and scalar nonlinearities.\r\n\r\nIt accepts a sequence of linearly projected image patches (also referred to as tokens) shaped as a “patches × channels” table as an input, and maintains this dimensionality. Mixer makes use of two types of MLP layers: channel-mixing MLPs and token-mixing MLPs. The channel-mixing MLPs allow communication between different channels; they operate on each token independently and take individual rows of the table as inputs. The token-mixing MLPs allow communication between different spatial locations (tokens); they operate on each channel independently and take individual columns of the table as inputs. These two types of layers are interleaved to enable interaction of both input dimensions." } } papers: { @@ -47716,14 +48186,14 @@ pr_id_to_video: { authors: "Mingming Sun" authors: "Ping Li" methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." } methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { name: "Dense Connections" @@ -47735,35 +48205,35 @@ pr_id_to_video: { full_name: "Label Smoothing" description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } methods: { name: "Transformer" full_name: "Transformer" description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "Convolution" - full_name: "Convolution" - description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } methods: { - name: "Depthwise Convolution" - full_name: "Depthwise Convolution" - description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } } papers: { @@ -47776,6 +48246,46 @@ pr_id_to_video: { } authors: "George Cazenavette" authors: "Manuel Ladron De Guevara" + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "MLP-Mixer" + full_name: "MLP-Mixer" + description: "The **MLP-Mixer** architecture (or “Mixer” for short) is an image architecture that doesn't use convolutions or self-attention. Instead, Mixer’s architecture is based entirely on multi-layer perceptrons (MLPs) that are repeatedly applied across either spatial locations or feature channels. Mixer relies only on basic matrix multiplication routines, changes to data layout (reshapes and transpositions), and scalar nonlinearities.\r\n\r\nIt accepts a sequence of linearly projected image patches (also referred to as tokens) shaped as a “patches × channels” table as an input, and maintains this dimensionality. Mixer makes use of two types of MLP layers: channel-mixing MLPs and token-mixing MLPs. The channel-mixing MLPs allow communication between different channels; they operate on each token independently and take individual rows of the table as inputs. The token-mixing MLPs allow communication between different spatial locations (tokens); they operate on each channel independently and take individual columns of the table as inputs. These two types of layers are interleaved to enable interaction of both input dimensions." + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } } papers: { paper_id: "pay-attention-to-mlps" @@ -47793,70 +48303,70 @@ pr_id_to_video: { url: "https://github.com/xmu-xiaoma666/External-Attention-pytorch" owner: "xmu-xiaoma666" framework: FRAMEWORK_PYTORCH - number_of_stars: 840 + number_of_stars: 1492 description: "🍀 Pytorch implementation of various Attention Mechanisms, MLP, Re-parameter, Convolution, which is helpful to further understand papers.⭐⭐⭐" } repositories: { - url: "https://github.com/SauravMaheshkar/gMLP" - owner: "SauravMaheshkar" - framework: FRAMEWORK_OTHERS - description: "Flax implementation of gMLP from \"Pay Attention to MLPs\" (https://arxiv.org/abs/2105.08050)" + url: "https://github.com/lucidrains/g-mlp-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 289 + description: "Implementation of gMLP, an all-MLP replacement for Transformers, in Pytorch" } repositories: { - url: "https://github.com/NydiaAI/g-mlp-tensorflow" - owner: "NydiaAI" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 6 - description: "A gMLP (gated MLP) implementation in Tensorflow 1.x, as described in the paper \"Pay Attention to MLPs\" (2105.08050)." + url: "https://github.com/jaketae/g-mlp" + owner: "jaketae" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "PyTorch implementation of Pay Attention to MLPs" } repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/gmlp" - owner: "transformers" + url: "https://github.com/antonyvigouret/Pay-Attention-to-MLPs" + owner: "antonyvigouret" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." + number_of_stars: 10 + description: "My implementation of the gMLP model from the paper \"Pay Attention to MLPs\"." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + url: "https://github.com/SauravMaheshkar/gMLP" + owner: "SauravMaheshkar" + framework: FRAMEWORK_OTHERS + description: "Flax implementation of gMLP from \"Pay Attention to MLPs\" (https://arxiv.org/abs/2105.08050)" } repositories: { - url: "https://github.com/leaderj1001/Bag-of-MLP" - owner: "leaderj1001" + url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/multimodal_models/gmlp" + owner: "multimodal_models" framework: FRAMEWORK_PYTORCH - number_of_stars: 15 - description: "Bag of MLP" + number_of_stars: 3 + description: "ai codebase" } repositories: { - url: "https://github.com/lucidrains/mlp-gpt-jax" + url: "https://github.com/lucidrains/g-mlp-gpt" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 44 - description: "A GPT, made only of MLPs, in Jax" + number_of_stars: 68 + description: "GPT, but made only out of MLPs" } repositories: { url: "https://github.com/nlpodyssey/spago/blob/main/pkg/ml/nn/gmlp/model.go" owner: "gmlp" framework: FRAMEWORK_OTHERS - number_of_stars: 942 + number_of_stars: 948 description: "Self-contained Machine Learning and Natural Language Processing library in Go" } repositories: { - url: "https://github.com/lucidrains/g-mlp-gpt" - owner: "lucidrains" + url: "https://github.com/rwightman/pytorch-image-models" + owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 68 - description: "GPT, but made only out of MLPs" + number_of_stars: 12196 + description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/multimodal_models/gmlp" - owner: "multimodal_models" + url: "https://github.com/leaderj1001/Bag-of-MLP" + owner: "leaderj1001" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "ai codebase" + number_of_stars: 16 + description: "Bag of MLP" } methods: { name: "Scaled Dot-Product Attention" @@ -47920,12 +48430,18 @@ pr_id_to_video: { authors: "Xiangning Chen" authors: "Cho-Jui Hsieh" authors: "Boqing Gong" + repositories: { + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } repositories: { is_official: true url: "https://github.com/google-research/vision_transformer" owner: "google-research" framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + number_of_stars: 3306 } } papers: { @@ -47944,7 +48460,7 @@ pr_id_to_video: { url: "https://github.com/neosapience/mlp-singer" owner: "neosapience" framework: FRAMEWORK_PYTORCH - number_of_stars: 25 + number_of_stars: 32 description: "Official implementation of MLP Singer: Towards Rapid Parallel Korean Singing Voice Synthesis" } } @@ -47977,8 +48493,8 @@ pr_id_to_video: { video: { video_id: "KQmZlxdnnuY" video_title: "PR-317: MLP-Mixer: An all-MLP Architecture for Vision" - number_of_likes: 97 - number_of_views: 3194 + number_of_likes: 108 + number_of_views: 3589 published_date: { seconds: 1620574418 } @@ -48006,34 +48522,48 @@ pr_id_to_video: { authors: "Piotr Bojanowski" authors: "Armand Joulin" repositories: { - url: "https://github.com/beresandras/contrastive-classification-keras" - owner: "beresandras" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 7 - description: "Implementation of self-supervised image-level contrastive pretraining methods using Keras." + url: "https://github.com/vturrisi/solo-learn" + owner: "vturrisi" + framework: FRAMEWORK_PYTORCH + number_of_stars: 289 + description: "solo-learn: a library of self-supervised methods for visual representation learning powered by Pytorch Lightning" + } + repositories: { + url: "https://github.com/facebookresearch/vissl" + owner: "facebookresearch" + framework: FRAMEWORK_PYTORCH + number_of_stars: 1878 + description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." } repositories: { url: "https://github.com/lucidrains/vit-pytorch" owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 + number_of_stars: 5337 description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" } repositories: { - url: "https://github.com/facebookresearch/vissl" - owner: "facebookresearch" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1788 - description: "VISSL is FAIR's library of extensible, modular and scalable components for SOTA Self-Supervised Learning with images." + url: "https://github.com/beresandras/contrastive-classification-keras" + owner: "beresandras" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 9 + description: "Implementation of self-supervised image-level contrastive pretraining methods using Keras." } repositories: { is_official: true url: "https://github.com/facebookresearch/dino" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 2548 + number_of_stars: 2715 description: "PyTorch code for Vision Transformers training with the Self-Supervised learning method DINO" } + repositories: { + url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/self_supervised_learning_methods/self_distillation_with_no_labels" + owner: "self_supervised_learning_methods" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "ai codebase" + } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -48077,7 +48607,7 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." } methods: { name: "Adam" @@ -48096,21 +48626,21 @@ pr_id_to_video: { authors: "Sara Atito" authors: "Muhammad Awais" authors: "Josef Kittler" - repositories: { - url: "https://github.com/rishikksh20/SiT-pytorch" - owner: "rishikksh20" - framework: FRAMEWORK_PYTORCH - number_of_stars: 7 - description: "SiT: Self-supervised vision Transformer" - } repositories: { is_official: true url: "https://github.com/Sara-Ahmed/SiT" owner: "Sara-Ahmed" framework: FRAMEWORK_PYTORCH - number_of_stars: 145 + number_of_stars: 150 description: "Self-supervised vIsion Transformer (SiT)" } + repositories: { + url: "https://github.com/rishikksh20/SiT-pytorch" + owner: "rishikksh20" + framework: FRAMEWORK_PYTORCH + number_of_stars: 7 + description: "SiT: Self-supervised vision Transformer" + } methods: { name: "AutoEncoder" full_name: "AutoEncoder" @@ -48132,7 +48662,7 @@ pr_id_to_video: { url: "https://github.com/CupidJay/MoCov3-pytorch" owner: "CupidJay" framework: FRAMEWORK_PYTORCH - number_of_stars: 27 + number_of_stars: 32 description: "custom pytorch implementation of MoCo v3" } methods: { @@ -48170,7 +48700,7 @@ pr_id_to_video: { url: "https://github.com/microsoft/Swin-Transformer" owner: "microsoft" framework: FRAMEWORK_PYTORCH - number_of_stars: 3861 + number_of_stars: 4163 description: "This is an official implementation for \"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows\"." } repositories: { @@ -48178,58 +48708,58 @@ pr_id_to_video: { url: "https://github.com/SwinTransformer/Transformer-SSL" owner: "SwinTransformer" framework: FRAMEWORK_PYTORCH - number_of_stars: 280 + number_of_stars: 312 description: "This is an official implementation for \"Self-Supervised Learning with Swin Transformers\"." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { - name: "BPE" - full_name: "Byte Pair Encoding" - description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." + name: "Random Gaussian Blur" + full_name: "Random Gaussian Blur" + description: "**Random Gaussian Blur** is an image data augmentation technique where we randomly blur the image using a Gaussian distribution.\r\n\r\nImage Source: [Wikipedia](https://en.wikipedia.org/wiki/Gaussian_blur)" } methods: { - name: "Dropout" - full_name: "Dropout" - description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." } methods: { - name: "Batch Normalization" - full_name: "Batch Normalization" - description: "**Batch Normalization** aims to reduce internal covariate shift, and in doing so aims to accelerate the training of deep neural nets. It accomplishes this via a normalization step that fixes the means and variances of layer inputs. Batch Normalization also has a beneficial effect on the gradient flow through the network, by reducing the dependence of gradients on the scale of the parameters or of their initial values. This allows for use of much higher learning rates without the risk of divergence. Furthermore, batch normalization regularizes the model and reduces the need for Dropout.\r\n\r\nWe apply a batch normalization layer as follows for a minibatch $\\mathcal{B}$:\r\n\r\n$$ \\mu\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}x\\_{i} $$\r\n\r\n$$ \\sigma^{2}\\_{\\mathcal{B}} = \\frac{1}{m}\\sum^{m}\\_{i=1}\\left(x\\_{i}-\\mu\\_{\\mathcal{B}}\\right)^{2} $$\r\n\r\n$$ \\hat{x}\\_{i} = \\frac{x\\_{i} - \\mu\\_{\\mathcal{B}}}{\\sqrt{\\sigma^{2}\\_{\\mathcal{B}}+\\epsilon}} $$\r\n\r\n$$ y\\_{i} = \\gamma\\hat{x}\\_{i} + \\beta = \\text{BN}\\_{\\gamma, \\beta}\\left(x\\_{i}\\right) $$\r\n\r\nWhere $\\gamma$ and $\\beta$ are learnable parameters." + name: "DeiT" + full_name: "Data-efficient Image Transformer" + description: "A **Data-Efficient Image Transformer** is a type of Vision Transformer for image classification tasks. The model is trained using a teacher-student strategy specific to transformers. It relies on a distillation token ensuring that the student learns from the teacher through attention." } methods: { - name: "Attention Dropout" - full_name: "Attention Dropout" - description: "**Attention Dropout** is a type of dropout used in attention-based architectures, where elements are randomly dropped out of the softmax in the attention equation. For example, for scaled-dot product attention, we would drop elements from the first term:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}\\left(\\frac{QK^{T}}{\\sqrt{d_k}}\\right)V $$" + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "MoCo" - full_name: "Momentum Contrast" - description: "**MoCo**, or **Momentum Contrast**, is a self-supervised learning algorithm with a contrastive loss. \r\n\r\nContrastive loss methods can be thought of as building dynamic dictionaries. The \"keys\" (tokens) in the dictionary are sampled from data (e.g., images or patches) and are represented by an encoder network. Unsupervised learning trains encoders to perform dictionary look-up: an encoded “query” should be similar to its matching key and dissimilar to others. Learning is formulated as minimizing a contrastive loss. \r\n\r\nMoCo can be viewed as a way to build large and consistent dictionaries for unsupervised learning with a contrastive loss. In MoCo, we maintain the dictionary as a queue of data samples: the encoded representations of the current mini-batch are enqueued, and the oldest are dequeued. The queue decouples the dictionary size from the mini-batch size, allowing it to be large. Moreover, as the dictionary keys come from the preceding several mini-batches, a slowly progressing key encoder, implemented as a momentum-based moving average of the query encoder, is proposed to maintain consistency." + name: "Stochastic Depth" + full_name: "Stochastic Depth" + description: "**Stochastic Depth** aims to shrink the depth of a network during training, while\r\nkeeping it unchanged during testing. This is achieved by randomly dropping entire [ResBlocks](https://paperswithcode.com/method/residual-block) during training and bypassing their transformations through skip connections. \r\n\r\nLet $b\\_{l} \\in$ {$0, 1$} denote a Bernoulli random variable, which indicates whether the $l$th ResBlock is active ($b\\_{l} = 1$) or inactive ($b\\_{l} = 0$). Further, let us denote the “survival” probability of ResBlock $l$ as $p\\_{l} = \\text{Pr}\\left(b\\_{l} = 1\\right)$. With this definition we can bypass the $l$th ResBlock by multiplying its function $f\\_{l}$ with $b\\_{l}$ and we extend the update rule to:\r\n\r\n$$ H\\_{l} = \\text{ReLU}\\left(b\\_{l}f\\_{l}\\left(H\\_{l-1}\\right) + \\text{id}\\left(H\\_{l-1}\\right)\\right) $$\r\n\r\nIf $b\\_{l} = 1$, this reduces to the original ResNet update and this ResBlock remains unchanged. If $b\\_{l} = 0$, the ResBlock reduces to the identity function, $H\\_{l} = \\text{id}\\left((H\\_{l}−1\\right)$." } methods: { - name: "BYOL" - full_name: "Bootstrap Your Own Latent" - description: "BYOL (Bootstrap Your Own Latent) is a new approach to self-supervised learning. BYOL’s goal is to learn a representation $y_θ$ which can then be used for downstream tasks. BYOL uses two neural networks to learn: the online and target networks. The online network is defined by a set of weights $θ$ and is comprised of three stages: an encoder $f_θ$, a projector $g_θ$ and a predictor $q_θ$. The target network has the same architecture\r\nas the online network, but uses a different set of weights $ξ$. The target network provides the regression\r\ntargets to train the online network, and its parameters $ξ$ are an exponential moving average of the\r\nonline parameters $θ$.\r\n\r\nGiven the architecture diagram on the right, BYOL minimizes a similarity loss between $q_θ(z_θ)$ and $sg(z'{_ξ})$, where $θ$ are the trained weights, $ξ$ are an exponential moving average of $θ$ and $sg$ means stop-gradient. At the end of training, everything but $f_θ$ is discarded, and $y_θ$ is used as the image representation.\r\n\r\nSource: [Bootstrap Your Own Latent - A New Approach to Self-Supervised Learning](https://paperswithcode.com/paper/bootstrap-your-own-latent-a-new-approach-to-1)\r\n\r\nImage credit: [Bootstrap Your Own Latent - A New Approach to Self-Supervised Learning](https://paperswithcode.com/paper/bootstrap-your-own-latent-a-new-approach-to-1)" + name: "Swin Transformer" + full_name: "Swin Transformer" + description: "The **Swin Transformer** is a type of Vision Transformer. It builds hierarchical feature maps by merging image patches (shown in gray) in deeper layers and has linear computation complexity to input image size due to computation of self-attention only within each local window (shown in red). It can thus serve as a general-purpose backbone for both image classification and dense recognition tasks. In contrast, previous vision Transformers produce feature maps of a single low resolution and have quadratic computation complexity to input image size due to computation of self-attention globally." } methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + name: "Adam" + full_name: "Adam" + description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." } methods: { - name: "Label Smoothing" - full_name: "Label Smoothing" - description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + name: "DINO" + full_name: "DINO" + description: "**DINO** (self-distillation with no labels) is a self-supervised learning method that directly predicts the output of a teacher network - built with a momentum encoder - by using a standard cross-entropy loss. \r\n\r\nIn the example to the right, DINO is illustrated in the case of one single pair of views $\\left(x\\_{1}, x\\_{2}\\right)$ for simplicity. The model passes two different random transformations of an input image to the student and teacher networks. Both networks have\r\nthe same architecture but different parameters. The output of the teacher network is centered with a mean computed over the batch. Each networks outputs a $K$ dimensional feature that is normalized with a temperature softmax over the feature dimension. Their\r\nsimilarity is then measured with a cross-entropy loss. A stop-gradient (sg) operator is applied on the teacher to propagate gradients\r\nonly through the student. The teacher parameters are updated with an exponential moving average (ema) of the student parameters." } methods: { - name: "Transformer" - full_name: "Transformer" - description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." + name: "BPE" + full_name: "Byte Pair Encoding" + description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." } } papers: { @@ -48250,7 +48780,7 @@ pr_id_to_video: { url: "https://github.com/Muzammal-Naseer/Intriguing-Properties-of-Vision-Transformers" owner: "Muzammal-Naseer" framework: FRAMEWORK_PYTORCH - number_of_stars: 79 + number_of_stars: 84 description: "Official repository for \"Intriguing Properties of Vision Transformers\" (2021)" } } @@ -48270,6 +48800,14 @@ pr_id_to_video: { authors: "Xiyang Dai" authors: "Lu Yuan" authors: "Jianfeng Gao" + repositories: { + is_official: true + url: "https://github.com/microsoft/esvit" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 118 + description: "EsViT: Efficient self-supervised Vision Transformers" + } } papers: { paper_id: "beit-bert-pre-training-of-image-transformers" @@ -48282,6 +48820,21 @@ pr_id_to_video: { authors: "Hangbo Bao" authors: "Li Dong" authors: "Furu Wei" + repositories: { + url: "https://github.com/huggingface/transformers" + owner: "huggingface" + framework: FRAMEWORK_PYTORCH + number_of_stars: 49984 + description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." + } + repositories: { + is_official: true + url: "https://github.com/microsoft/unilm/tree/master/beit" + owner: "master" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2565 + description: "UniLM AI - Unified \"Language\" Model Pre-training across Tasks, Languages, and Modalities" + } methods: { name: "Attention Dropout" full_name: "Attention Dropout" @@ -48386,21 +48939,21 @@ pr_id_to_video: { description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Stochastic Depth" + full_name: "Stochastic Depth" + description: "**Stochastic Depth** aims to shrink the depth of a network during training, while\r\nkeeping it unchanged during testing. This is achieved by randomly dropping entire [ResBlocks](https://paperswithcode.com/method/residual-block) during training and bypassing their transformations through skip connections. \r\n\r\nLet $b\\_{l} \\in$ {$0, 1$} denote a Bernoulli random variable, which indicates whether the $l$th ResBlock is active ($b\\_{l} = 1$) or inactive ($b\\_{l} = 0$). Further, let us denote the “survival” probability of ResBlock $l$ as $p\\_{l} = \\text{Pr}\\left(b\\_{l} = 1\\right)$. With this definition we can bypass the $l$th ResBlock by multiplying its function $f\\_{l}$ with $b\\_{l}$ and we extend the update rule to:\r\n\r\n$$ H\\_{l} = \\text{ReLU}\\left(b\\_{l}f\\_{l}\\left(H\\_{l-1}\\right) + \\text{id}\\left(H\\_{l-1}\\right)\\right) $$\r\n\r\nIf $b\\_{l} = 1$, this reduces to the original ResNet update and this ResBlock remains unchanged. If $b\\_{l} = 0$, the ResBlock reduces to the identity function, $H\\_{l} = \\text{id}\\left((H\\_{l}−1\\right)$." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Swin Transformer" + full_name: "Swin Transformer" + description: "The **Swin Transformer** is a type of Vision Transformer. It builds hierarchical feature maps by merging image patches (shown in gray) in deeper layers and has linear computation complexity to input image size due to computation of self-attention only within each local window (shown in red). It can thus serve as a general-purpose backbone for both image classification and dense recognition tasks. In contrast, previous vision Transformers produce feature maps of a single low resolution and have quadratic computation complexity to input image size due to computation of self-attention globally." } } video: { video_id: "iOLbznp18h8" video_title: "PR-318: Emerging Properties in Self-Supervised Vision Transformers" - number_of_likes: 12 - number_of_views: 759 + number_of_likes: 17 + number_of_views: 1003 published_date: { seconds: 1621176607 } @@ -48569,7 +49122,7 @@ pr_id_to_video: { url: "https://github.com/bcol23/HyperIM" owner: "bcol23" framework: FRAMEWORK_PYTORCH - number_of_stars: 33 + number_of_stars: 34 description: "PyTorch implementation of the paper \"Hyperbolic Interaction Model For Hierarchical Multi-Label Classification\"" } } @@ -48577,7 +49130,7 @@ pr_id_to_video: { video_id: "H3qz24z3VSo" video_title: "PR-319: Interactive Multi-Label CNN Learning with Partial Labels" number_of_likes: 2 - number_of_views: 138 + number_of_views: 183 published_date: { seconds: 1622386577 } @@ -48664,13 +49217,6 @@ pr_id_to_video: { authors: "Yoshua Bengio" authors: "Aaron Courville" authors: "Pascal Vincent" - repositories: { - url: "https://github.com/gitlimlab/Representation-Learning-by-Learning-to-Count" - owner: "gitlimlab" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 106 - description: "A Tensorflow implementation of Representation Learning by Learning to Count" - } repositories: { url: "https://github.com/Kismuz/crypto_spread_test" owner: "Kismuz" @@ -48685,6 +49231,13 @@ pr_id_to_video: { number_of_stars: 5 description: "Download files as in wget :file_folder:" } + repositories: { + url: "https://github.com/gitlimlab/Representation-Learning-by-Learning-to-Count" + owner: "gitlimlab" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 106 + description: "A Tensorflow implementation of Representation Learning by Learning to Count" + } } papers: { paper_id: "adversarial-transfer-learning" @@ -48701,7 +49254,7 @@ pr_id_to_video: { url: "https://github.com/zhaoxin94/awsome-domain-adaptation" owner: "zhaoxin94" framework: FRAMEWORK_PYTORCH - number_of_stars: 2982 + number_of_stars: 3072 description: "A collection of AWESOME things about domian adaptation" } } @@ -48735,15 +49288,15 @@ pr_id_to_video: { url: "https://github.com/taohan10200/NLT" owner: "taohan10200" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 6 description: "PyTorch implementations of the paper: \"Neuron Linear Transformation: Modeling the Domain Shift for Crowd Counting. (T-NNLS, 2021)...\" " } } video: { video_id: "yaRytHhZatQ" video_title: "PR-320: Domain Invariant Representation Learning with Domain Density Transformations" - number_of_likes: 5 - number_of_views: 152 + number_of_likes: 8 + number_of_views: 208 published_date: { seconds: 1621787874 } @@ -48841,20 +49394,6 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Mario Lucic" authors: "Alexey Dosovitskiy" - repositories: { - url: "https://github.com/SauravMaheshkar/MLP-Mixer" - owner: "SauravMaheshkar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Minimal Flax implementation of MLP-Mixer from \"MLP-Mixer: An all-MLP Architecture for Vision\" (https://arxiv.org/abs/2105.01601)" - } - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } repositories: { url: "https://github.com/sekilab/image_processing_bootcamp2021" owner: "sekilab" @@ -48862,25 +49401,28 @@ pr_id_to_video: { number_of_stars: 2 } repositories: { - is_official: true - url: "https://github.com/google-research/vision_transformer" - owner: "google-research" - framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + url: "https://github.com/Benjamin-Etheredge/mlp-mixer-keras" + owner: "Benjamin-Etheredge" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 } repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/mlp_mixer" - owner: "transformers" + url: "https://github.com/imad08/MLP-Mixer" + owner: "imad08" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." } repositories: { - url: "https://github.com/bangoc123/mlp-mixer" - owner: "bangoc123" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/leondgarse/Keras_mlp" + owner: "leondgarse" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 56 - description: "Implementation for paper MLP-Mixer: An all-MLP Architecture for Vision" + number_of_stars: 4 + description: "Keras implementation of mlp-mixer, ResMLP. imagenet/imagenet21k weights reloaded." } repositories: { url: "https://github.com/lavish619/MLP-Mixer-PyTorch" @@ -48895,29 +49437,36 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH description: "Pytorch implementation of MLP Mixer" } - repositories: { - url: "https://github.com/04RR/SOTA-Vision" - owner: "04RR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of various state of the art architectures used in computer vision. " - } repositories: { url: "https://github.com/sayakpaul/MLP-Mixer-CIFAR10" owner: "sayakpaul" framework: FRAMEWORK_OTHERS - number_of_stars: 32 + number_of_stars: 34 description: "Implements MLP-Mixer (https://arxiv.org/abs/2105.01601) with the CIFAR-10 dataset. " } + repositories: { + url: "https://github.com/rishikksh20/MLP-Mixer-pytorch" + owner: "rishikksh20" + framework: FRAMEWORK_PYTORCH + number_of_stars: 137 + description: "Unofficial implementation of MLP-Mixer: An all-MLP Architecture for Vision" + } + repositories: { + url: "https://github.com/isaaccorley/mlp-mixer-pytorch" + owner: "isaaccorley" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "PyTorch implementation of \"MLP-Mixer: An all-MLP Architecture for Vision\" Tolstikhin et al. (2021)" + } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." } methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { name: "Dense Connections" @@ -48929,35 +49478,35 @@ pr_id_to_video: { full_name: "Label Smoothing" description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } methods: { name: "Transformer" full_name: "Transformer" description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } methods: { - name: "Vision Transformer" - full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "MLP-Mixer" + full_name: "MLP-Mixer" + description: "The **MLP-Mixer** architecture (or “Mixer” for short) is an image architecture that doesn't use convolutions or self-attention. Instead, Mixer’s architecture is based entirely on multi-layer perceptrons (MLPs) that are repeatedly applied across either spatial locations or feature channels. Mixer relies only on basic matrix multiplication routines, changes to data layout (reshapes and transpositions), and scalar nonlinearities.\r\n\r\nIt accepts a sequence of linearly projected image patches (also referred to as tokens) shaped as a “patches × channels” table as an input, and maintains this dimensionality. Mixer makes use of two types of MLP layers: channel-mixing MLPs and token-mixing MLPs. The channel-mixing MLPs allow communication between different channels; they operate on each token independently and take individual rows of the table as inputs. The token-mixing MLPs allow communication between different spatial locations (tokens); they operate on each channel independently and take individual columns of the table as inputs. These two types of layers are interleaved to enable interaction of both input dimensions." } } papers: { @@ -48976,7 +49525,7 @@ pr_id_to_video: { url: "https://github.com/dandelin/vilt" owner: "dandelin" framework: FRAMEWORK_PYTORCH - number_of_stars: 287 + number_of_stars: 342 description: "Code for the ICML 2021 (long talk) paper: \"ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision\"" } methods: { @@ -49046,74 +49595,72 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Neil Houlsby" repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" + url: "https://github.com/KatherLab/HIA" + owner: "KatherLab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + description: "Histopathology Image Analysis" } repositories: { - is_official: true - url: "https://github.com/google-research/vision_transformer" - owner: "google-research" - framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + url: "https://github.com/protonx-engineering/vit" + owner: "protonx-engineering" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 7 + description: "Our implementation for paper: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" } repositories: { - url: "https://github.com/wangguanan/light-reid" - owner: "wangguanan" + url: "https://github.com/YousefGamal220/Vision-Transformers" + owner: "YousefGamal220" framework: FRAMEWORK_PYTORCH - number_of_stars: 333 - description: "[ECCV2020] a toolbox of light-reid learning for faster inference, speed both feature extraction and retrieval stages up to >30x" + number_of_stars: 4 + description: "A PyTorch Implementation of Vision-Transformers to classify the classes of CIFAR-100 dataset, the model implemented from the paper: An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" } repositories: { - url: "https://github.com/nachiket273/VisTrans" - owner: "nachiket273" - framework: FRAMEWORK_PYTORCH - number_of_stars: 1 - description: "Implementations of transformers based models for different vision tasks" + url: "https://github.com/woctezuma/steam-CLIP" + owner: "woctezuma" + framework: FRAMEWORK_OTHERS + number_of_stars: 7 + description: "Retrieve Steam games with similar store banners, with OpenAI's CLIP." } repositories: { - url: "https://github.com/quanmario0311/ViT_PyTorch" - owner: "quanmario0311" + url: "https://github.com/gnoses/ViT_examples" + owner: "gnoses" framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "A PyTorch Implementation of ViT (Vision Transformer)" + number_of_stars: 1 } repositories: { - url: "https://github.com/purbayankar/Hyperspectral-Vision-Transformer" - owner: "purbayankar" + url: "https://github.com/nachiket273/Vision_transformer_pytorch" + owner: "nachiket273" framework: FRAMEWORK_PYTORCH number_of_stars: 6 - description: "A PyTorch implementation of CNN+Vision Transformer for hyperspectral image classification" + description: "Simple Implementation of Vision Transformer (https://openreview.net/pdf?id=YicbFdNTTy)" } repositories: { - url: "https://github.com/asarigun/TransGAN" - owner: "asarigun" + url: "https://github.com/tahmid0007/VisionTransformer" + owner: "tahmid0007" framework: FRAMEWORK_PYTORCH - number_of_stars: 4 - description: "This is re-implementation of TransGAN in PyTorch." + number_of_stars: 65 + description: "A complete easy to follow implementation of Google's Vision Transformer proposed in \"AN IMAGE IS WORTH 16X16 WORDS\". This pytorch implementation has comments for better understanding." } repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" + url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/vision_models/vision_transformer" + owner: "vision_models" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" + number_of_stars: 3 + description: "ai codebase" } repositories: { - url: "https://github.com/04RR/SOTA-Vision" - owner: "04RR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of various state of the art architectures used in computer vision. " + url: "https://github.com/sayannath/ViT-Image-Classification" + owner: "sayannath" + framework: FRAMEWORK_OTHERS + number_of_stars: 4 + description: "Image Classification with Vision Transformer - Keras" } repositories: { - url: "https://github.com/zer0sh0t/artificial_intelligence/tree/master/vision_models/vision_transformer" - owner: "vision_models" - framework: FRAMEWORK_PYTORCH - number_of_stars: 2 - description: "ai codebase" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } methods: { name: "Residual Connection" @@ -49163,7 +49710,7 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." } } papers: { @@ -49178,21 +49725,21 @@ pr_id_to_video: { authors: "Ricardo Garcia" authors: "Ivan Laptev" authors: "Cordelia Schmid" - repositories: { - url: "https://github.com/isaaccorley/segmenter-pytorch" - owner: "isaaccorley" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "PyTorch implementation of \"Segmenter: Transformer for Semantic Segmentation\" Strudel et al. (2021) " - } repositories: { is_official: true url: "https://github.com/rstrudel/segmenter" owner: "rstrudel" framework: FRAMEWORK_PYTORCH - number_of_stars: 172 + number_of_stars: 194 description: "Official PyTorch implementation of Segmenter: Transformer for Semantic Segmentation" } + repositories: { + url: "https://github.com/isaaccorley/segmenter-pytorch" + owner: "isaaccorley" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "PyTorch implementation of \"Segmenter: Transformer for Semantic Segmentation\" Strudel et al. (2021) " + } methods: { name: "Residual Connection" full_name: "Residual Connection" @@ -49236,7 +49783,7 @@ pr_id_to_video: { methods: { name: "Vision Transformer" full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + description: "The **Vision Transformer**, or **ViT**, is a model for image classification that employs a [Transformer](https://www.paperswithcode.com/method/transformer)-like architecture over patches of the image. We split an image into fixed-size patches, linearly embed each of them, add position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder. In order to perform classification, we use the standard approach of adding an extra learnable “classification token” to the sequence." } methods: { name: "Multi-Head Attention" @@ -49262,7 +49809,7 @@ pr_id_to_video: { owner: "DyGRec" framework: FRAMEWORK_TENSORFLOW number_of_stars: 11 - description: "Released code of Augmenting Sequential Recommendation with Pseudo-Prior Items via Reversely Pre-training Transformer." + description: "Released code of SIGIR2021 Augmenting Sequential Recommendation with Pseudo-Prior Items via Reversely Pre-training Transformer." } } papers: { @@ -49281,53 +49828,55 @@ pr_id_to_video: { authors: "Lu Yuan" authors: "Lei Zhang" repositories: { - url: "https://github.com/lucidrains/vit-pytorch" - owner: "lucidrains" + url: "https://github.com/microsoft/esvit" + owner: "microsoft" framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 - description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + number_of_stars: 118 + description: "EsViT: Efficient self-supervised Vision Transformers" } repositories: { - is_official: true - url: "https://github.com/microsoft/CvT" - owner: "microsoft" + url: "https://github.com/leoxiaobin/CvT" + owner: "leoxiaobin" framework: FRAMEWORK_PYTORCH - number_of_stars: 93 + number_of_stars: 53 description: "This is an official implementation of CvT: Introducing Convolutions to Vision Transformers." } repositories: { url: "https://github.com/rishikksh20/convolution-vision-transformers" owner: "rishikksh20" framework: FRAMEWORK_PYTORCH - number_of_stars: 130 + number_of_stars: 142 description: "PyTorch Implementation of CvT: Introducing Convolutions to Vision Transformers" } repositories: { - url: "https://github.com/leoxiaobin/CvT" - owner: "leoxiaobin" + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" framework: FRAMEWORK_PYTORCH - number_of_stars: 39 + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } + repositories: { + is_official: true + url: "https://github.com/microsoft/CvT" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 130 description: "This is an official implementation of CvT: Introducing Convolutions to Vision Transformers." } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } methods: { name: "Residual Connection" full_name: "Residual Connection" description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." - } - methods: { - name: "Dense Connections" - full_name: "Dense Connections" - description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" - } - methods: { - name: "Label Smoothing" - full_name: "Label Smoothing" - description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } methods: { name: "Transformer" @@ -49335,36 +49884,41 @@ pr_id_to_video: { description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + name: "BPE" + full_name: "Byte Pair Encoding" + description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." } methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" } methods: { - name: "Vision Transformer" - full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } } video: { video_id: "pMBXwIx4du8" video_title: "PR-321: Are Pre-trained Convolutions Better than Pre-trained Transformers?" - number_of_likes: 11 - number_of_views: 501 + number_of_likes: 15 + number_of_views: 639 published_date: { seconds: 1622392266 } @@ -49389,16 +49943,16 @@ pr_id_to_video: { authors: "Stéphane Mallat" repositories: { is_official: true - url: "https://github.com/j-zarka/separation_concentration_deepnets" - owner: "j-zarka" + url: "https://github.com/iclr2021-paper1937/separation_concentration_deepnets" + owner: "iclr2021-paper1937" framework: FRAMEWORK_PYTORCH number_of_stars: 2 description: "Repository for paper \"Separation and Concentration in Deep Networks\" accepted at ICLR 2021" } repositories: { is_official: true - url: "https://github.com/iclr2021-paper1937/separation_concentration_deepnets" - owner: "iclr2021-paper1937" + url: "https://github.com/j-zarka/separation_concentration_deepnets" + owner: "j-zarka" framework: FRAMEWORK_PYTORCH number_of_stars: 2 description: "Repository for paper \"Separation and Concentration in Deep Networks\" accepted at ICLR 2021" @@ -49501,7 +50055,7 @@ pr_id_to_video: { paper_id: "the-connection-between-approximation-depth" title: "The Connection Between Approximation, Depth Separation and Learnability in Neural Networks" arxiv_id: "2102.00434" - abstract: "Several recent works have shown separation results between deep neural networks, and hypothesis classes with inferior approximation capacity such as shallow networks or kernel classes. On the other hand, the fact that deep networks can efficiently express a target function does not mean this target function can be learned efficiently by deep neural networks. In this work we study the intricate connection between learnability and approximation capacity. We show that learnability with deep networks of a target function depends on the ability of simpler classes to approximate the target. Specifically, we show that a necessary condition for a function to be learnable by gradient descent on deep neural networks is to be able to approximate the function, at least in a weak sense, with shallow neural networks. We also show that a class of functions can be learned by an efficient statistical query algorithm if and only if it can be approximated in a weak sense by some kernel class. We give several examples of functions which demonstrate depth separation, and conclude that they cannot be efficiently learned, even by a hypothesis class that can efficiently approximate them." + abstract: "Several recent works have shown separation results between deep neural networks, and hypothesis classes with inferior approximation capacity such as shallow networks or kernel classes. On the other hand, the fact that deep networks can efficiently express a target function does not mean that this target function can be learned efficiently by deep neural networks. In this work we study the intricate connection between learnability and approximation capacity. We show that learnability with deep networks of a target function depends on the ability of simpler classes to approximate the target. Specifically, we show that a necessary condition for a function to be learnable by gradient descent on deep neural networks is to be able to approximate the function, at least in a weak sense, with shallow neural networks. We also show that a class of functions can be learned by an efficient statistical query algorithm if and only if it can be approximated in a weak sense by some kernel class. We give several examples of functions which demonstrate depth separation, and conclude that they cannot be efficiently learned, even by a hypothesis class that can efficiently approximate them." published_date: { seconds: 1612051200 } @@ -49586,8 +50140,8 @@ pr_id_to_video: { video: { video_id: "FnI2F-cN8Ts" video_title: "PR-323: Separation and Concentration in Deep Networks" - number_of_likes: 14 - number_of_views: 405 + number_of_likes: 16 + number_of_views: 528 published_date: { seconds: 1622992183 } @@ -49629,7 +50183,7 @@ pr_id_to_video: { url: "https://github.com/dandelin/vilt" owner: "dandelin" framework: FRAMEWORK_PYTORCH - number_of_stars: 287 + number_of_stars: 342 description: "Code for the ICML 2021 (long talk) paper: \"ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision\"" } methods: { @@ -49694,7 +50248,7 @@ pr_id_to_video: { url: "https://github.com/YIKUAN8/Transformers-VQA" owner: "YIKUAN8" framework: FRAMEWORK_PYTORCH - number_of_stars: 81 + number_of_stars: 88 description: "An implementation that downstreams pre-trained V+L models to VQA tasks. Now support: VisualBERT, LXMERT, and UNITER" } } @@ -49789,7 +50343,7 @@ pr_id_to_video: { url: "https://github.com/zhegan27/VILLA" owner: "zhegan27" framework: FRAMEWORK_PYTORCH - number_of_stars: 78 + number_of_stars: 82 description: "Research Code for NeurIPS 2020 Spotlight paper \"Large-Scale Adversarial Training for Vision-and-Language Representation Learning\": UNITER adversarial training part" } } @@ -49810,7 +50364,7 @@ pr_id_to_video: { url: "https://github.com/webYFDT/hateful" owner: "webYFDT" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 6 } } papers: { @@ -49834,7 +50388,7 @@ pr_id_to_video: { url: "https://github.com/mczhuge/Kaleido-BERT" owner: "mczhuge" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 62 + number_of_stars: 68 description: "(CVPR2021) Kaleido-BERT: Vision-Language Pre-training on Fashion Domain" } methods: { @@ -49922,15 +50476,20 @@ pr_id_to_video: { url: "https://github.com/jayleicn/ClipBERT" owner: "jayleicn" framework: FRAMEWORK_PYTORCH - number_of_stars: 327 + number_of_stars: 381 description: "[CVPR 2021 Best Student Paper Honorable Mention, Oral] Official PyTorch code for ClipBERT, an efficient framework for end-to-end learning on image-text and video-text tasks. " } + methods: { + name: "ClipBERT" + full_name: "ClipBERT" + description: "**ClipBERT** is a framework for end-to-end-learning for video-and-language tasks, by employing sparse sampling, where only a single or a few sparsely sampled short clips from a video are used at each training step. Two aspects distinguish ClipBERT from previous work. \r\n\r\nFirst, in contrast to densely extracting video features (adopted by most existing methods), CLIPBERT sparsely samples only one single or a few short clips from the full-length videos at each training step. The hypothesis is that visual features from sparse clips already capture key visual and semantic information in the video, as consecutive clips usually contain similar semantics from a continuous scene. Thus, a handful of clips are sufficient for training, instead of using the full video. Then, predictions from multiple densely-sampled clips are aggregated to obtain the final video-level prediction during inference, which is less computational demanding. \r\n\r\nThe second differentiating aspect concerns the initialization of model weights (i.e., transfer through pre-training). The authors use 2D architectures (e.g., ResNet-50) instead of 3D features as the visual backbone for video encoding, allowing them to harness the power of image-text pretraining for video-text understanding along with the advantages of low memory cost and runtime efficiency." + } } video: { video_id: "Kgh88DLHHTo" video_title: "PR-325: Pixel-BERT: Aligning Image Pixels with Text by Deep Multi-Modal Transformers" - number_of_likes: 6 - number_of_views: 235 + number_of_likes: 8 + number_of_views: 326 published_date: { seconds: 1623654539 } @@ -49957,7 +50516,7 @@ pr_id_to_video: { url: "https://github.com/vturrisi/solo-learn" owner: "vturrisi" framework: FRAMEWORK_PYTORCH - number_of_stars: 47 + number_of_stars: 289 description: "solo-learn: a library of self-supervised methods for visual representation learning powered by Pytorch Lightning" } } @@ -50019,7 +50578,7 @@ pr_id_to_video: { url: "https://github.com/YannDubs/lossyless" owner: "YannDubs" framework: FRAMEWORK_PYTORCH - number_of_stars: 5 + number_of_stars: 12 description: "Generic image compressor for machine learning. Pytorch code for our paper \"Lossy compression for lossless prediction\"." } } @@ -50041,15 +50600,15 @@ pr_id_to_video: { url: "https://github.com/joshr17/IFM" owner: "joshr17" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 + number_of_stars: 10 description: "Code for paper \"Can contrastive learning avoid shortcut solutions?\"" } } video: { video_id: "iQVvhLxGAt8" video_title: "PR-326: VICReg: Variance-Invariance-Covariance Regularization for Self-Supervised Learning" - number_of_likes: 15 - number_of_views: 317 + number_of_likes: 17 + number_of_views: 471 published_date: { seconds: 1624200774 } @@ -50132,7 +50691,7 @@ pr_id_to_video: { url: "https://github.com/fabricerosay/AlphaGPU" owner: "fabricerosay" framework: FRAMEWORK_OTHERS - number_of_stars: 11 + number_of_stars: 16 description: "Alphazero on GPU thanks to CUDA.jl" } methods: { @@ -50156,7 +50715,7 @@ pr_id_to_video: { url: "https://github.com/openai/improved-diffusion" owner: "openai" framework: FRAMEWORK_PYTORCH - number_of_stars: 147 + number_of_stars: 168 description: "Release for Improved Denoising Diffusion Probabilistic Models" } } @@ -50188,8 +50747,8 @@ pr_id_to_video: { video: { video_id: "2irqTp-3hQ0" video_title: "PR-327: Scaling Laws for Autoregressive Generative Modeling" - number_of_likes: 5 - number_of_views: 212 + number_of_likes: 8 + number_of_views: 304 published_date: { seconds: 1624806800 } @@ -50212,36 +50771,18 @@ pr_id_to_video: { authors: "Johannes Ballé" authors: "Valero Laparra" authors: "Eero P. Simoncelli" - repositories: { - url: "https://github.com/ipc-lab/DWSIC" - owner: "ipc-lab" - framework: FRAMEWORK_PYTORCH - number_of_stars: 3 - description: "Deep Stereo Image Compression with Decoder Side Information Using Wyner Common Information" - } - repositories: { - url: "https://github.com/gergely-flamich/relative-entropy-coding" - owner: "gergely-flamich" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 8 - } - repositories: { - url: "https://github.com/faymek/compression" - owner: "faymek" - framework: FRAMEWORK_TENSORFLOW - } repositories: { url: "https://github.com/tensorflow/compression" owner: "tensorflow" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 510 + number_of_stars: 516 description: "Data compression in TensorFlow" } repositories: { url: "https://github.com/jorge-pessoa/pytorch-gdn" owner: "jorge-pessoa" framework: FRAMEWORK_PYTORCH - number_of_stars: 50 + number_of_stars: 51 description: "PyTorch implementation of the Generalized divisive normalization non-linearity layer" } repositories: { @@ -50262,16 +50803,34 @@ pr_id_to_video: { url: "https://github.com/liujiaheng/iclr_17_compression" owner: "liujiaheng" framework: FRAMEWORK_PYTORCH - number_of_stars: 39 + number_of_stars: 42 description: "End-to-end optimized image compression" } + repositories: { + url: "https://github.com/gergely-flamich/relative-entropy-coding" + owner: "gergely-flamich" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 8 + } repositories: { url: "https://github.com/FireFYF/modulatedautoencoder" owner: "FireFYF" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 5 + number_of_stars: 8 description: "Variable rate with MAE" } + repositories: { + url: "https://github.com/faymek/compression" + owner: "faymek" + framework: FRAMEWORK_TENSORFLOW + } + repositories: { + url: "https://github.com/ipc-lab/DWSIC" + owner: "ipc-lab" + framework: FRAMEWORK_PYTORCH + number_of_stars: 3 + description: "Deep Stereo Image Compression with Decoder Side Information Using Wyner Common Information" + } } papers: { paper_id: "end-to-end-optimized-image-compression-for-1" @@ -50304,8 +50863,8 @@ pr_id_to_video: { video: { video_id: "rtuJqQDWmIA" video_title: "PR-328: End-to-End Optimized Image Compression" - number_of_likes: 4 - number_of_views: 200 + number_of_likes: 9 + number_of_views: 314 published_date: { seconds: 1624902283 } @@ -50377,20 +50936,6 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Mario Lucic" authors: "Alexey Dosovitskiy" - repositories: { - url: "https://github.com/SauravMaheshkar/MLP-Mixer" - owner: "SauravMaheshkar" - framework: FRAMEWORK_TENSORFLOW - number_of_stars: 4 - description: "Minimal Flax implementation of MLP-Mixer from \"MLP-Mixer: An all-MLP Architecture for Vision\" (https://arxiv.org/abs/2105.01601)" - } - repositories: { - url: "https://github.com/rwightman/pytorch-image-models" - owner: "rwightman" - framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 - description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" - } repositories: { url: "https://github.com/sekilab/image_processing_bootcamp2021" owner: "sekilab" @@ -50398,25 +50943,28 @@ pr_id_to_video: { number_of_stars: 2 } repositories: { - is_official: true - url: "https://github.com/google-research/vision_transformer" - owner: "google-research" - framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + url: "https://github.com/Benjamin-Etheredge/mlp-mixer-keras" + owner: "Benjamin-Etheredge" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 6 } repositories: { - url: "https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/mlp_mixer" - owner: "transformers" + url: "https://github.com/imad08/MLP-Mixer" + owner: "imad08" framework: FRAMEWORK_PYTORCH - number_of_stars: 3213 - description: "🧠 Implementations/tutorials of deep learning papers with side-by-side notes; including transformers (original, xl, switch, feedback), optimizers(adam, radam, adabelief), gans(dcgan, cyclegan, stylegan2), reinforcement learning (ppo, dqn), capsnet, sketch-rnn, etc." } repositories: { - url: "https://github.com/bangoc123/mlp-mixer" - owner: "bangoc123" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 + } + repositories: { + url: "https://github.com/leondgarse/Keras_mlp" + owner: "leondgarse" framework: FRAMEWORK_TENSORFLOW - number_of_stars: 56 - description: "Implementation for paper MLP-Mixer: An all-MLP Architecture for Vision" + number_of_stars: 4 + description: "Keras implementation of mlp-mixer, ResMLP. imagenet/imagenet21k weights reloaded." } repositories: { url: "https://github.com/lavish619/MLP-Mixer-PyTorch" @@ -50431,29 +50979,36 @@ pr_id_to_video: { framework: FRAMEWORK_PYTORCH description: "Pytorch implementation of MLP Mixer" } - repositories: { - url: "https://github.com/04RR/SOTA-Vision" - owner: "04RR" - framework: FRAMEWORK_PYTORCH - number_of_stars: 6 - description: "Implementation of various state of the art architectures used in computer vision. " - } repositories: { url: "https://github.com/sayakpaul/MLP-Mixer-CIFAR10" owner: "sayakpaul" framework: FRAMEWORK_OTHERS - number_of_stars: 32 + number_of_stars: 34 description: "Implements MLP-Mixer (https://arxiv.org/abs/2105.01601) with the CIFAR-10 dataset. " } + repositories: { + url: "https://github.com/rishikksh20/MLP-Mixer-pytorch" + owner: "rishikksh20" + framework: FRAMEWORK_PYTORCH + number_of_stars: 137 + description: "Unofficial implementation of MLP-Mixer: An all-MLP Architecture for Vision" + } + repositories: { + url: "https://github.com/isaaccorley/mlp-mixer-pytorch" + owner: "isaaccorley" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "PyTorch implementation of \"MLP-Mixer: An all-MLP Architecture for Vision\" Tolstikhin et al. (2021)" + } methods: { - name: "Residual Connection" - full_name: "Residual Connection" - description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." } methods: { - name: "Layer Normalization" - full_name: "Layer Normalization" - description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" } methods: { name: "Dense Connections" @@ -50465,42 +51020,42 @@ pr_id_to_video: { full_name: "Label Smoothing" description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } methods: { name: "Transformer" full_name: "Transformer" description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." } methods: { - name: "Scaled Dot-Product Attention" - full_name: "Scaled Dot-Product Attention" - description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." - } - methods: { - name: "Softmax" - full_name: "Softmax" - description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" } methods: { - name: "Vision Transformer" - full_name: "Vision Transformer" - description: "The **Vision Transformer** is a model for image classification that employs a Transformer-like architecture over patches of the image." + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." } methods: { - name: "Multi-Head Attention" - full_name: "Multi-Head Attention" - description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." } methods: { - name: "Adam" - full_name: "Adam" - description: "**Adam** is an adaptive learning rate optimization algorithm that utilises both momentum and scaling, combining the benefits of [RMSProp](https://paperswithcode.com/method/rmsprop) and [SGD w/th Momentum](https://paperswithcode.com/method/sgd-with-momentum). The optimizer is designed to be appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. \r\n\r\nThe weight updates are performed as:\r\n\r\n$$ w_{t} = w_{t-1} - \\eta\\frac{\\hat{m}\\_{t}}{\\sqrt{\\hat{v}\\_{t}} + \\epsilon} $$\r\n\r\nwith\r\n\r\n$$ \\hat{m}\\_{t} = \\frac{m_{t}}{1-\\beta^{t}_{1}} $$\r\n\r\n$$ \\hat{v}\\_{t} = \\frac{v_{t}}{1-\\beta^{t}_{2}} $$\r\n\r\n$$ m_{t} = \\beta_{1}m_{t-1} + (1-\\beta_{1})g_{t} $$\r\n\r\n$$ v_{t} = \\beta_{2}v_{t-1} + (1-\\beta_{2})g_{t}^{2} $$\r\n\r\n\r\n$ \\eta $ is the step size/learning rate, around 1e-3 in the original paper. $ \\epsilon $ is a small number, typically 1e-8 or 1e-10, to prevent dividing by zero. $ \\beta_{1} $ and $ \\beta_{2} $ are forgetting parameters, with typical values 0.9 and 0.999, respectively." + name: "MLP-Mixer" + full_name: "MLP-Mixer" + description: "The **MLP-Mixer** architecture (or “Mixer” for short) is an image architecture that doesn't use convolutions or self-attention. Instead, Mixer’s architecture is based entirely on multi-layer perceptrons (MLPs) that are repeatedly applied across either spatial locations or feature channels. Mixer relies only on basic matrix multiplication routines, changes to data layout (reshapes and transpositions), and scalar nonlinearities.\r\n\r\nIt accepts a sequence of linearly projected image patches (also referred to as tokens) shaped as a “patches × channels” table as an input, and maintains this dimensionality. Mixer makes use of two types of MLP layers: channel-mixing MLPs and token-mixing MLPs. The channel-mixing MLPs allow communication between different channels; they operate on each token independently and take individual rows of the table as inputs. The token-mixing MLPs allow communication between different spatial locations (tokens); they operate on each channel independently and take individual columns of the table as inputs. These two types of layers are interleaved to enable interaction of both input dimensions." } } video: { video_id: "NicKVB-rpc8" video_title: "PR-329: Early Convolutions Help Transformers See Better" - number_of_likes: 5 - number_of_views: 190 + number_of_likes: 12 + number_of_views: 449 published_date: { seconds: 1625481694 } @@ -50527,25 +51082,31 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Lucas Beyer" repositories: { - url: "https://github.com/rstrudel/segmenter" - owner: "rstrudel" - framework: FRAMEWORK_PYTORCH - number_of_stars: 172 - description: "Official PyTorch implementation of Segmenter: Transformer for Semantic Segmentation" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } repositories: { is_official: true url: "https://github.com/google-research/vision_transformer" owner: "google-research" framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + number_of_stars: 3306 + } + repositories: { + url: "https://github.com/rstrudel/segmenter" + owner: "rstrudel" + framework: FRAMEWORK_PYTORCH + number_of_stars: 194 + description: "Official PyTorch implementation of Segmenter: Transformer for Semantic Segmentation" } repositories: { is_official: true url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } } @@ -50564,25 +51125,31 @@ pr_id_to_video: { authors: "Jakob Uszkoreit" authors: "Lucas Beyer" repositories: { - url: "https://github.com/rstrudel/segmenter" - owner: "rstrudel" - framework: FRAMEWORK_PYTORCH - number_of_stars: 172 - description: "Official PyTorch implementation of Segmenter: Transformer for Semantic Segmentation" + url: "https://github.com/ttt496/VisionTransformer" + owner: "ttt496" + framework: FRAMEWORK_OTHERS + number_of_stars: 1 } repositories: { is_official: true url: "https://github.com/google-research/vision_transformer" owner: "google-research" framework: FRAMEWORK_OTHERS - number_of_stars: 3100 + number_of_stars: 3306 + } + repositories: { + url: "https://github.com/rstrudel/segmenter" + owner: "rstrudel" + framework: FRAMEWORK_PYTORCH + number_of_stars: 194 + description: "Official PyTorch implementation of Segmenter: Transformer for Semantic Segmentation" } repositories: { is_official: true url: "https://github.com/rwightman/pytorch-image-models" owner: "rwightman" framework: FRAMEWORK_PYTORCH - number_of_stars: 11591 + number_of_stars: 12196 description: "PyTorch image models, scripts, pretrained weights -- ResNet, ResNeXT, EfficientNet, EfficientNetV2, NFNet, Vision Transformer, MixNet, MobileNet-V3/V2, RegNet, DPN, CSPNet, and more" } } @@ -50601,35 +51168,24 @@ pr_id_to_video: { authors: "Alexandre Sablayrolles" authors: "Hervé Jégou" repositories: { - url: "https://github.com/PaddlePaddle/PaddleClas" - owner: "PaddlePaddle" - framework: FRAMEWORK_OTHERS - number_of_stars: 2085 - description: "A treasure chest for visual recognition powered by PaddlePaddle" - } - repositories: { - url: "https://github.com/bshantam97/Attention_Based_Networks" - owner: "bshantam97" - framework: FRAMEWORK_PYTORCH - } - repositories: { - url: "https://github.com/tianhai123/vit-pytorch" - owner: "tianhai123" + url: "https://github.com/huggingface/transformers" + owner: "huggingface" framework: FRAMEWORK_PYTORCH - number_of_stars: 2 + number_of_stars: 49984 + description: "🤗 Transformers: State-of-the-art Natural Language Processing for Pytorch, TensorFlow, and JAX." } repositories: { - url: "https://github.com/lucidrains/vit-pytorch" - owner: "lucidrains" + url: "https://github.com/UdbhavPrasad072300/Transformer-Implementations" + owner: "UdbhavPrasad072300" framework: FRAMEWORK_PYTORCH - number_of_stars: 5023 - description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + number_of_stars: 18 + description: "Library - Vanilla, ViT, DeiT, BERT, GPT" } repositories: { url: "https://github.com/TACJu/TransFG" owner: "TACJu" framework: FRAMEWORK_PYTORCH - number_of_stars: 117 + number_of_stars: 125 description: "This is the official PyTorch implementation of the paper \"TransFG: A Transformer Architecture for Fine-grained Recognition\" (Ju He, Jie-Neng Chen, Shuai Liu, Adam Kortylewski, Cheng Yang, Yutong Bai, Changhu Wang, Alan Yuille)." } repositories: { @@ -50637,15 +51193,40 @@ pr_id_to_video: { url: "https://github.com/facebookresearch/deit" owner: "facebookresearch" framework: FRAMEWORK_PYTORCH - number_of_stars: 1967 + number_of_stars: 2047 description: "Official DeiT repository" } repositories: { - url: "https://github.com/UdbhavPrasad072300/Transformer-Implementations" - owner: "UdbhavPrasad072300" + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } + repositories: { + url: "https://github.com/PaddlePaddle/PaddleClas" + owner: "PaddlePaddle" + framework: FRAMEWORK_OTHERS + number_of_stars: 2166 + description: "A treasure chest for visual recognition powered by PaddlePaddle" + } + repositories: { + url: "https://github.com/tianhai123/vit-pytorch" + owner: "tianhai123" + framework: FRAMEWORK_PYTORCH + number_of_stars: 2 + } + repositories: { + url: "https://github.com/bshantam97/Attention_Based_Networks" + owner: "bshantam97" framework: FRAMEWORK_PYTORCH - number_of_stars: 17 - description: "Library - Vanilla, ViT, DeiT, BERT, GPT" + } + repositories: { + url: "https://github.com/cogtoolslab/physics-benchmarking-neurips2021" + owner: "cogtoolslab" + framework: FRAMEWORK_OTHERS + number_of_stars: 9 + description: "Repo for \"Physion: Evaluating Physical Prediction from Vision in Humans and Machines\" submission to NeurIPS 2021 (Datasets & Benchmarks track)" } methods: { name: "Attention Dropout" @@ -50701,8 +51282,8 @@ pr_id_to_video: { video: { video_id: "A3RrAIx-KCc" video_title: "PR-330: How To Train Your ViT? Data, Augmentation, and Regularization in Vision Transformers" - number_of_likes: 20 - number_of_views: 405 + number_of_likes: 32 + number_of_views: 753 published_date: { seconds: 1626015401 } @@ -50743,48 +51324,6 @@ pr_id_to_video: { authors: "Geoff Pleiss" authors: "Yu Sun" authors: "Kilian Q. Weinberger" - repositories: { - url: "https://github.com/hollance/reliability-diagrams" - owner: "hollance" - framework: FRAMEWORK_PYTORCH - number_of_stars: 30 - description: "Reliability diagrams visualize whether a classifier model needs calibration" - } - repositories: { - url: "https://github.com/sleep3r/garrus" - owner: "sleep3r" - framework: FRAMEWORK_OTHERS - number_of_stars: 13 - description: "Python framework for high quality confidence estimation of deep neural networks, providing methods such as confidence calibration and ordinal ranking" - } - repositories: { - url: "https://github.com/bayesgroup/pytorch-ensembles" - owner: "bayesgroup" - framework: FRAMEWORK_PYTORCH - number_of_stars: 142 - description: "Pitfalls of In-Domain Uncertainty Estimation and Ensembling in Deep Learning, ICLR 2020" - } - repositories: { - url: "https://github.com/artnitolog/diary" - owner: "artnitolog" - framework: FRAMEWORK_OTHERS - description: "Accompanying repository for the 3rd year corsework. CMC MSU, MMF, 2020-2021." - } - repositories: { - url: "https://github.com/johntd54/stanford_car" - owner: "johntd54" - framework: FRAMEWORK_PYTORCH - number_of_stars: 5 - description: "Classification model for fine-grained visual classification on the Stanford Car dataset." - } - repositories: { - is_official: true - url: "https://github.com/gpleiss/temperature_scaling" - owner: "gpleiss" - framework: FRAMEWORK_PYTORCH - number_of_stars: 565 - description: "A simple way to calibrate your neural network." - } repositories: { url: "https://github.com/AnanyaKumar/verified_calibration" owner: "AnanyaKumar" @@ -50801,7 +51340,7 @@ pr_id_to_video: { url: "https://github.com/Jonathan-Pearce/calibration_library" owner: "Jonathan-Pearce" framework: FRAMEWORK_PYTORCH - number_of_stars: 7 + number_of_stars: 9 description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" } repositories: { @@ -50811,6 +51350,45 @@ pr_id_to_video: { number_of_stars: 32 description: "Code for the 2018 EMNLP Interpretability Workshop Paper \"Interpreting Neural Networks with Nearest Neighbors\"" } + repositories: { + url: "https://github.com/Jonathan-Pearce/cnn_calibration" + owner: "Jonathan-Pearce" + framework: FRAMEWORK_PYTORCH + number_of_stars: 9 + description: "Pytorch library for model calibration metrics and visualizations as well as recalibration methods. In progress!" + } + repositories: { + url: "https://github.com/cpark321/uncertainty-deep-learning" + owner: "cpark321" + framework: FRAMEWORK_PYTORCH + number_of_stars: 98 + } + repositories: { + url: "https://github.com/cpark321/bayesian-neural-networks" + owner: "cpark321" + framework: FRAMEWORK_PYTORCH + number_of_stars: 98 + } + repositories: { + url: "https://github.com/aigen/df-posthoc-calibration" + owner: "aigen" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 + description: "Model-agnostic posthoc calibration without distributional assumptions" + } + repositories: { + url: "https://github.com/sirius8050/Expected-Calibration-Error" + owner: "sirius8050" + framework: FRAMEWORK_OTHERS + number_of_stars: 6 + } + repositories: { + url: "https://github.com/ondrejba/tf_calibrate" + owner: "ondrejba" + framework: FRAMEWORK_TENSORFLOW + number_of_stars: 2 + description: "Calibration of neural networks in Tensorflow." + } } papers: { paper_id: "revisiting-the-calibration-of-modern-neural" @@ -50832,8 +51410,8 @@ pr_id_to_video: { video: { video_id: "rI-vJuNKyIU" video_title: "PR-331: Revisiting the Calibration of Modern Neural Networks" - number_of_likes: 7 - number_of_views: 161 + number_of_likes: 10 + number_of_views: 312 published_date: { seconds: 1626015278 } @@ -50841,3 +51419,812 @@ pr_id_to_video: { } } } +pr_id_to_video: { + key: 332 + value: { + pr_id: 332 + papers: { + paper_id: "provably-consistent-partial-label-learning" + title: "Provably Consistent Partial-Label Learning" + arxiv_id: "2007.08929" + abstract: "Partial-label learning (PLL) is a multi-class classification problem, where each training example is associated with a set of candidate labels. Even though many practical PLL methods have been proposed in the last two decades, there lacks a theoretical understanding of the consistency of those methods-none of the PLL methods hitherto possesses a generation process of candidate label sets, and then it is still unclear why such a method works on a specific dataset and when it may fail given a different dataset. In this paper, we propose the first generation model of candidate label sets, and develop two novel PLL methods that are guaranteed to be provably consistent, i.e., one is risk-consistent and the other is classifier-consistent. Our methods are advantageous, since they are compatible with any deep network or stochastic optimizer. Furthermore, thanks to the generation model, we would be able to answer the two questions above by testing if the generation model matches given candidate label sets. Experiments on benchmark and real-world datasets validate the effectiveness of the proposed generation model and two PLL methods." + published_date: { + seconds: 1594944000 + } + authors: "Lei Feng" + authors: "Jiaqi Lv" + authors: "Bo Han" + authors: "Miao Xu" + authors: "Gang Niu" + authors: "Xin Geng" + authors: "Bo An" + authors: "Masashi Sugiyama" + } + papers: { + paper_id: "a-semi-supervised-two-stage-approach-to" + title: "A Semi-Supervised Two-Stage Approach to Learning from Noisy Labels" + arxiv_id: "1802.02679" + abstract: "The recent success of deep neural networks is powered in part by large-scale well-labeled training data. However, it is a daunting task to laboriously annotate an ImageNet-like dateset. On the contrary, it is fairly convenient, fast, and cheap to collect training images from the Web along with their noisy labels. This signifies the need of alternative approaches to training deep neural networks using such noisy labels. Existing methods tackling this problem either try to identify and correct the wrong labels or reweigh the data terms in the loss function according to the inferred noisy rates. Both strategies inevitably incur errors for some of the data points. In this paper, we contend that it is actually better to ignore the labels of some of the data points than to keep them if the labels are incorrect, especially when the noisy rate is high. After all, the wrong labels could mislead a neural network to a bad local optimum. We suggest a two-stage framework for the learning from noisy labels. In the first stage, we identify a small portion of images from the noisy training set of which the labels are correct with a high probability. The noisy labels of the other images are ignored. In the second stage, we train a deep neural network in a semi-supervised manner. This framework effectively takes advantage of the whole training set and yet only a portion of its labels that are most likely correct. Experiments on three datasets verify the effectiveness of our approach especially when the noisy rate is high." + published_date: { + seconds: 1518048000 + } + authors: "Yifan Ding" + authors: "Liqiang Wang" + authors: "Deliang Fan" + authors: "Boqing Gong" + } + papers: { + paper_id: "they-are-not-completely-useless-towards" + title: "They are Not Completely Useless: Towards Recycling Transferable Unlabeled Data for Class-Mismatched Semi-Supervised Learning" + arxiv_id: "2011.13529" + abstract: "Semi-Supervised Learning (SSL) with mismatched classes deals with the problem that the classes-of-interests in the limited labeled data is only a subset of the classes in massive unlabeled data. As a result, the classes only possessed by the unlabeled data may mislead the classifier training and thus hindering the realistic landing of various SSL methods. To solve this problem, existing methods usually divide unlabeled data to in-distribution (ID) data and out-of-distribution (OOD) data, and directly discard or weaken the OOD data to avoid their adverse impact. In other words, they treat OOD data as completely useless and thus the potential valuable information for classification contained by them is totally ignored. To remedy this defect, this paper proposes a \"Transferable OOD data Recycling\" (TOOR) method which properly utilizes ID data as well as the \"recyclable\" OOD data to enrich the information for conducting class-mismatched SSL. Specifically, TOOR firstly attributes all unlabeled data to ID data or OOD data, among which the ID data are directly used for training. Then we treat the OOD data that have a close relationship with ID data and labeled data as recyclable, and employ adversarial domain adaptation to project them to the space of ID data and labeled data. In other words, the recyclability of an OOD datum is evaluated by its transferability, and the recyclable OOD data are transferred so that they are compatible with the distribution of known classes-of-interests. Consequently, our TOOR method extracts more information from unlabeled data than existing approaches, so it can achieve the improved performance which is demonstrated by the experiments on typical benchmark datasets." + published_date: { + seconds: 1606435200 + } + authors: "Zhuo Huang" + authors: "Ying Tai" + authors: "Chengjie Wang" + authors: "Jian Yang" + authors: "Chen Gong" + } + video: { + video_id: "-5fFL68d7Gg" + video_title: "PR-332:Deep Discriminative CNN with Temporal Ensembling for Ambiguously Labeled Image Classification" + number_of_likes: 3 + number_of_views: 111 + published_date: { + seconds: 1626617399 + } + uploader: "yunssun" + } + } +} +pr_id_to_video: { + key: 333 + value: { + pr_id: 333 + papers: { + paper_id: "synthesizer-rethinking-self-attention-in" + title: "Synthesizer: Rethinking Self-Attention in Transformer Models" + arxiv_id: "2005.00743" + abstract: "The dot product self-attention is known to be central and indispensable to state-of-the-art Transformer models. But is it really required? This paper investigates the true importance and contribution of the dot product-based self-attention mechanism on the performance of Transformer models. Via extensive experiments, we find that (1) random alignment matrices surprisingly perform quite competitively and (2) learning attention weights from token-token (query-key) interactions is useful but not that important after all. To this end, we propose \\textsc{Synthesizer}, a model that learns synthetic attention weights without token-token interactions. In our experiments, we first show that simple Synthesizers achieve highly competitive performance when compared against vanilla Transformer models across a range of tasks, including machine translation, language modeling, text generation and GLUE/SuperGLUE benchmarks. When composed with dot product attention, we find that Synthesizers consistently outperform Transformers. Moreover, we conduct additional comparisons of Synthesizers against Dynamic Convolutions, showing that simple Random Synthesizer is not only $60\\%$ faster but also improves perplexity by a relative $3.5\\%$. Finally, we show that simple factorized Synthesizers can outperform Linformers on encoding only tasks." + published_date: { + seconds: 1588377600 + } + authors: "Yi Tay" + authors: "Dara Bahri" + authors: "Donald Metzler" + authors: "Da-Cheng Juan" + authors: "Zhe Zhao" + authors: "Che Zheng" + repositories: { + url: "https://github.com/10-zin/Synthesizer" + owner: "10-zin" + framework: FRAMEWORK_PYTORCH + number_of_stars: 44 + description: "A PyTorch implementation of the paper - \"Synthesizer: Rethinking Self-Attention in Transformer Models\"" + } + methods: { + name: "Factorized Random Synthesized Attention" + full_name: "Factorized Random Synthesized Attention" + description: "**Factorized Random Synthesized Attention**, introduced with the [Synthesizer](https://paperswithcode.com/method/synthesizer) architecture, is similar to [factorized dense synthesized attention](https://paperswithcode.com/method/factorized-dense-synthesized-attention) but for random synthesizers. Letting $R$ being a randomly initialized matrix, we factorize $R$ into low rank matrices $R\\_{1}, R\\_{2} \\in \\mathbb{R}^{l\\text{ x}k}$ in the attention function:\r\n\r\n$$ Y = \\text{Softmax}\\left(R\\_{1}R\\_{2}^{T}\\right)G\\left(X\\right) . $$\r\n\r\nHere $G\\left(.\\right)$ is a parameterized function that is equivalent to $V$ in [Scaled Dot-Product Attention](https://paperswithcode.com/method/scaled).\r\n\r\nFor each head, the factorization reduces the parameter costs from $l^{2}$ to $2\\left(lk\\right)$ where\r\n$k << l$ and hence helps prevent overfitting. In practice, we use a small value of $k = 8$.\r\n\r\nThe basic idea of a Random Synthesizer is to not rely on pairwise token interactions or any information from individual token but rather to learn a task-specific alignment that works well globally across many samples." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + methods: { + name: "Transformer" + full_name: "Transformer" + description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." + } + methods: { + name: "Synthesizer" + full_name: "Synthesizer" + description: "The **Synthesizer** is a model that learns synthetic attention weights without token-token interactions. Unlike [Transformers](https://paperswithcode.com/method/transformer), the model eschews dot product self-attention but also content-based self-attention altogether. Synthesizer learns to synthesize the self-alignment matrix instead of manually computing pairwise dot products. It is transformation-based, only relies on simple feed-forward layers, and completely dispenses with dot products and explicit token-token interactions. \r\n\r\nThis new module employed by the Synthesizer is called \"Synthetic Attention\": a new way of learning to attend without explicitly attending (i.e., without dot product attention or content-based attention). Instead, Synthesizer generate the alignment matrix independent of token-token dependencies." + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Random Synthesized Attention" + full_name: "Random Synthesized Attention" + description: "**Random Synthesized Attention** is a form of synthesized attention where the attention weights are not conditioned on any input tokens. Instead, the attention weights are initialized to random values. It was introduced with the [Synthesizer](https://paperswithcode.com/method/synthesizer) architecture. Random Synthesized Attention contrasts with [Dense Synthesized Attention](https://paperswithcode.com/method/dense-synthesized-attention) which conditions on each token independently, as opposed to pairwise token interactions in the vanilla Transformer model.\r\n\r\nLet $R$ be a randomly initialized matrix. Random Synthesized Attention is defined as:\r\n\r\n$$Y = \\text{Softmax}\\left(R\\right)G\\left(X\\right) $$\r\n\r\nwhere $R \\in \\mathbb{R}^{l \\text{ x } l}$. Notably, each head adds 2 parameters to the overall network. The basic idea of the Random Synthesizer is to not rely on pairwise token interactions or any information from individual token but rather to learn a task-specific alignment that works well globally across many samples. This is a direct generalization of the recently proposed fixed self-attention patterns of [Raganato et al (2020)](https://arxiv.org/abs/2002.10260)." + } + methods: { + name: "Dense Synthesized Attention" + full_name: "Dense Synthesized Attention" + description: "**Dense Synthesized Attention**, introduced with the [Synthesizer](https://paperswithcode.com/method/synthesizer) architecture, is a type of synthetic attention mechanism that replaces the notion of [query-key-values](https://paperswithcode.com/method/scaled) in the self-attention module and directly synthesizes the alignment matrix instead. Dense attention is conditioned on each input token. The method accepts an input $X \\in \\mathbb{R}^{l\\text{ x }d}$ and produces an output of $Y \\in \\mathbb{R}^{l\\text{ x }d}$. Here $l$ refers to the sequence length and $d$ refers to the dimensionality of the model. We first adopt $F\\left(.\\right)$, a parameterized function, for projecting input $X\\_{i}$ from $d$ dimensions to $l$ dimensions.\r\n\r\n$$B\\_{i} = F\\left(X\\_{i}\\right)$$\r\n\r\nwhere $F\\left(.\\right)$ is a parameterized function that maps $\\mathbb{R}^{d}$ to $\\mathbb{R}^{l}$ and $i$ is the $i$-th token of $X$. Intuitively, this can be interpreted as learning a token-wise projection to the sequence length $l$. Essentially, with this model, each token predicts weights for each token in the input sequence. In practice, a simple two layered feed-forward layer with ReLU activations for $F\\left(.\\right)$ is adopted:\r\n\r\n$$ F\\left(X\\right) = W\\left(\\sigma\\_{R}\\left(W(X) + b\\right)\\right) + b$$\r\n\r\nwhere $\\sigma\\_{R}$ is the ReLU activation function. Hence, $B$ is now of $\\mathbb{R}^{l\\text{ x }d}$. Given $B$, we now compute:\r\n\r\n$$ Y = \\text{Softmax}\\left(B\\right)G\\left(X\\right) $$\r\n\r\nwhere $G\\left(.\\right)$ is another parameterized function of $X$ that is analogous to $V$ (value) in the standard [Transformer](https://paperswithcode.com/method/transformer) model. This approach eliminates the [dot product](https://paperswithcode.com/method/scaled) altogether by replacing $QK^{T}$ in standard Transformers with the synthesizing function $F\\left(.\\right)$." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Factorized Dense Synthesized Attention" + full_name: "Factorized Dense Synthesized Attention" + description: "**Factorized Dense Synthesized Attention** is a synthesized attention mechanism, similar to [dense synthesized attention](https://paperswithcode.com/method/dense-synthesized-attention), but we factorize the outputs to reduce parameters and prevent overfitting. It was proposed as part of the [Synthesizer](https://paperswithcode.com/method/synthesizer) architecture. The factorized variant of the dense synthesizer can be expressed as follows:\r\n\r\n$$A, B = F\\_{A}\\left(X\\_{i}\\right), F\\_{B}\\left(X\\_{i}\\right)$$\r\n\r\nwhere $F\\_{A}\\left(.\\right)$ projects input $X\\_{i}$ into $a$ dimensions, $F\\_B\\left(.\\right)$ projects $X\\_{i}$ to $b$ dimensions, and $a \\text{ x } b = l$. The output of the factorized module is now written as:\r\n\r\n$$ Y = \\text{Softmax}\\left(C\\right)G\\left(X\\right) $$\r\n\r\nwhere $C = H\\_{A}\\left(A\\right) * H\\_{B}\\left(B\\right)$, where $H\\_{A}$, $H\\_{B}$ are tiling functions and $C \\in \\mathbb{R}^{l \\text{ x } l}$. The tiling function simply duplicates the vector $k$ times, i.e., $\\mathbb{R}^{l} \\rightarrow \\mathbb{R}^{lk}$. In this case, $H\\_{A}\\left(\\right)$ is a projection of $\\mathbb{R}^{a} \\rightarrow \\mathbb{R}^{ab}$ and $H\\_{B}\\left(\\right)$ is a projection of $\\mathbb{R}^{b} \\rightarrow \\mathbb{R}^{ba}$. To avoid having similar values within the same block, we compose the outputs of $H\\_{A}$ and $H\\_{B}$." + } + } + papers: { + paper_id: "refiner-refining-self-attention-for-vision" + title: "Refiner: Refining Self-attention for Vision Transformers" + arxiv_id: "2106.03714" + abstract: "Vision Transformers (ViTs) have shown competitive accuracy in image classification tasks compared with CNNs. Yet, they generally require much more data for model pre-training. Most of recent works thus are dedicated to designing more complex architectures or training methods to address the data-efficiency issue of ViTs. However, few of them explore improving the self-attention mechanism, a key factor distinguishing ViTs from CNNs. Different from existing works, we introduce a conceptually simple scheme, called refiner, to directly refine the self-attention maps of ViTs. Specifically, refiner explores attention expansion that projects the multi-head attention maps to a higher-dimensional space to promote their diversity. Further, refiner applies convolutions to augment local patterns of the attention maps, which we show is equivalent to a distributed local attention features are aggregated locally with learnable kernels and then globally aggregated with self-attention. Extensive experiments demonstrate that refiner works surprisingly well. Significantly, it enables ViTs to achieve 86% top-1 classification accuracy on ImageNet with only 81M parameters." + published_date: { + seconds: 1623024000 + } + authors: "Daquan Zhou" + authors: "Yujun Shi" + authors: "Bingyi Kang" + authors: "Weihao Yu" + authors: "Zihang Jiang" + authors: "Yuan Li" + authors: "Xiaojie Jin" + authors: "Qibin Hou" + authors: "Jiashi Feng" + repositories: { + is_official: true + url: "https://github.com/zhoudaquan/Refiner_ViT" + owner: "zhoudaquan" + framework: FRAMEWORK_PYTORCH + number_of_stars: 63 + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + } + video: { + video_id: "I9kQwMbpxuE" + video_title: "PR-333: Synthesizer: Rethinking Self-Attention for Transformer Models" + number_of_likes: 6 + number_of_views: 264 + published_date: { + seconds: 1626662307 + } + uploader: "Jeon Eddie" + } + } +} +pr_id_to_video: { + key: 334 + value: { + pr_id: 334 + papers: { + paper_id: "cmt-convolutional-neural-networks-meet-vision" + title: "CMT: Convolutional Neural Networks Meet Vision Transformers" + arxiv_id: "2107.06263" + abstract: "Vision transformers have been successfully applied to image recognition tasks due to their ability to capture long-range dependencies within an image. However, there are still gaps in both performance and computational cost between transformers and existing convolutional neural networks (CNNs). In this paper, we aim to address this issue and develop a network that can outperform not only the canonical transformers, but also the high-performance convolutional models. We propose a new transformer based hybrid network by taking advantage of transformers to capture long-range dependencies, and of CNNs to model local features. Furthermore, we scale it to obtain a family of models, called CMTs, obtaining much better accuracy and efficiency than previous convolution and transformer based models. In particular, our CMT-S achieves 83.5% top-1 accuracy on ImageNet, while being 14x and 2x smaller on FLOPs than the existing DeiT and EfficientNet, respectively. The proposed CMT-S also generalizes well on CIFAR10 (99.2%), CIFAR100 (91.7%), Flowers (98.7%), and other challenging vision datasets such as COCO (44.3% mAP), with considerably less computational cost." + published_date: { + seconds: 1626134400 + } + authors: "Jianyuan Guo" + authors: "Kai Han" + authors: "Han Wu" + authors: "Chang Xu" + authors: "Yehui Tang" + authors: "Chunjing Xu" + authors: "Yunhe Wang" + repositories: { + url: "https://github.com/FlyEgle/CMT-pytorch" + owner: "FlyEgle" + framework: FRAMEWORK_PYTORCH + number_of_stars: 18 + } + repositories: { + url: "https://github.com/Nanxiaohu666/CMT" + owner: "Nanxiaohu666" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5 + description: "unofficial version" + } + repositories: { + url: "https://github.com/yuranusduke/CMT-Convolutional-NN-Meets-ViT" + owner: "yuranusduke" + framework: FRAMEWORK_PYTORCH + number_of_stars: 10 + description: "Pytorch unofficial implementation of CMT" + } + repositories: { + url: "https://github.com/wilile26811249/CMT_CNN-meet-Vision-Transformer" + owner: "wilile26811249" + framework: FRAMEWORK_PYTORCH + number_of_stars: 6 + description: "A PyTorch implementation of CMT based on paper CMT: Convolutional Neural Networks Meet Vision Transformers." + } + methods: { + name: "Feedforward Network" + full_name: "Feedforward Network" + description: "A **Feedforward Network**, or a **Multilayer Perceptron (MLP)**, is a neural network with solely densely connected layers. This is the classic neural network architecture of the literature. It consists of inputs $x$ passed through units $h$ (of which there can be many layers) to predict a target $y$. Activation functions are generally chosen to be non-linear to allow for flexible functional approximation.\r\n\r\nImage Source: Deep Learning, Goodfellow et al" + } + methods: { + name: "Sigmoid Activation" + full_name: "Sigmoid Activation" + description: "**Sigmoid Activations** are a type of activation function for neural networks:\r\n\r\n$$f\\left(x\\right) = \\frac{1}{\\left(1+\\exp\\left(-x\\right)\\right)}$$\r\n\r\nSome drawbacks of this activation that have been noted in the literature are: sharp damp gradients during backpropagation from deeper hidden layers to inputs, gradient saturation, and slow convergence." + } + methods: { + name: "Squeeze-and-Excitation Block" + full_name: "Squeeze-and-Excitation Block" + description: "The **Squeeze-and-Excitation Block** is an architectural unit designed to improve the representational power of a network by enabling it to perform dynamic channel-wise feature recalibration. The process is:\r\n\r\n- The block has a convolutional block as an input.\r\n- Each channel is \"squeezed\" into a single numeric value using average pooling.\r\n- A dense layer followed by a ReLU adds non-linearity and output channel complexity is reduced by a ratio.\r\n- Another dense layer followed by a sigmoid gives each channel a smooth gating function.\r\n- Finally, we weight each feature map of the convolutional block based on the side network; the \"excitation\"." + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "DeiT" + full_name: "Data-efficient Image Transformer" + description: "A **Data-Efficient Image Transformer** is a type of Vision Transformer for image classification tasks. The model is trained using a teacher-student strategy specific to transformers. It relies on a distillation token ensuring that the student learns from the teacher through attention." + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "1x1 Convolution" + full_name: "1x1 Convolution" + description: "A **1 x 1 Convolution** is a convolution with some special properties in that it can be used for dimensionality reduction, efficient low dimensional embeddings, and applying non-linearity after convolutions. It maps an input pixel with all its channels to an output pixel which can be squeezed to a desired output depth. It can be viewed as an [MLP](https://paperswithcode.com/method/feedforward-network) looking at a particular pixel location.\r\n\r\nImage Credit: [http://deeplearning.ai](http://deeplearning.ai)" + } + methods: { + name: "ReLU" + full_name: "Rectified Linear Units" + description: "**Rectified Linear Units**, or **ReLUs**, are a type of activation function that are linear in the positive dimension, but zero in the negative dimension. The kink in the function is the source of the non-linearity. Linearity in the positive dimension has the attractive property that it prevents non-saturation of gradients (contrast with [sigmoid activations](https://paperswithcode.com/method/sigmoid-activation)), although for half of the real line its gradient is zero.\r\n\r\n$$ f\\left(x\\right) = \\max\\left(0, x\\right) $$" + } + } + papers: { + paper_id: "cvt-introducing-convolutions-to-vision" + title: "CvT: Introducing Convolutions to Vision Transformers" + arxiv_id: "2103.15808" + abstract: "We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) to the ViT architecture (\\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\\ie dynamic attention, global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, performance gains are maintained when pretrained on larger datasets (\\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks. Code will be released at \\url{https://github.com/leoxiaobin/CvT}." + published_date: { + seconds: 1616976000 + } + authors: "Haiping Wu" + authors: "Bin Xiao" + authors: "Noel Codella" + authors: "Mengchen Liu" + authors: "Xiyang Dai" + authors: "Lu Yuan" + authors: "Lei Zhang" + repositories: { + url: "https://github.com/microsoft/esvit" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 118 + description: "EsViT: Efficient self-supervised Vision Transformers" + } + repositories: { + url: "https://github.com/leoxiaobin/CvT" + owner: "leoxiaobin" + framework: FRAMEWORK_PYTORCH + number_of_stars: 53 + description: "This is an official implementation of CvT: Introducing Convolutions to Vision Transformers." + } + repositories: { + url: "https://github.com/rishikksh20/convolution-vision-transformers" + owner: "rishikksh20" + framework: FRAMEWORK_PYTORCH + number_of_stars: 142 + description: "PyTorch Implementation of CvT: Introducing Convolutions to Vision Transformers" + } + repositories: { + url: "https://github.com/lucidrains/vit-pytorch" + owner: "lucidrains" + framework: FRAMEWORK_PYTORCH + number_of_stars: 5337 + description: "Implementation of Vision Transformer, a simple way to achieve SOTA in vision classification with only a single transformer encoder, in Pytorch" + } + repositories: { + is_official: true + url: "https://github.com/microsoft/CvT" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 130 + description: "This is an official implementation of CvT: Introducing Convolutions to Vision Transformers." + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Transformer" + full_name: "Transformer" + description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." + } + methods: { + name: "BPE" + full_name: "Byte Pair Encoding" + description: "**Byte Pair Encoding**, or **BPE**, is a subword segmentation algorithm that encodes rare and unknown words as sequences of subword units. The intuition is that various word classes are translatable via smaller units than words, for instance names (via character copying or transliteration), compounds (via compositional translation), and cognates and loanwords (via phonological and morphological transformations).\r\n\r\n[Lei Mao](https://leimao.github.io/blog/Byte-Pair-Encoding/) has a detailed blog post that explains how this works." + } + methods: { + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Pointwise Convolution" + full_name: "Pointwise Convolution" + description: "**Pointwise Convolution** is a type of convolution that uses a 1x1 kernel: a kernel that iterates through every single point. This kernel has a depth of however many channels the input image has. It can be used in conjunction with [depthwise convolutions](https://paperswithcode.com/method/depthwise-convolution) to produce an efficient class of convolutions known as [depthwise-separable convolutions](https://paperswithcode.com/method/depthwise-separable-convolution).\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + } + papers: { + paper_id: "ba-2m-a-batch-aware-attention-module-for" + title: "BA^2M: A Batch Aware Attention Module for Image Classification" + arxiv_id: "2103.15099" + abstract: "The attention mechanisms have been employed in Convolutional Neural Network (CNN) to enhance the feature representation. However, existing attention mechanisms only concentrate on refining the features inside each sample and neglect the discrimination between different samples. In this paper, we propose a batch aware attention module (BA2M) for feature enrichment from a distinctive perspective. More specifically, we first get the sample-wise attention representation (SAR) by fusing the channel, local spatial and global spatial attention maps within each sample. Then, we feed the SARs of the whole batch to a normalization function to get the weights for each sample. The weights serve to distinguish the features' importance between samples in a training batch with different complexity of content. The BA2M could be embedded into different parts of CNN and optimized with the network in an end-to-end manner. The design of BA2M is lightweight with few extra parameters and calculations. We validate BA2M through extensive experiments on CIFAR-100 and ImageNet-1K for the image recognition task. The results show that BA2M can boost the performance of various network architectures and outperforms many classical attention methods. Besides, BA2M exceeds traditional methods of re-weighting samples based on the loss value." + published_date: { + seconds: 1616889600 + } + authors: "Qishang Cheng" + authors: "Hongliang Li" + authors: "Qingbo Wu" + authors: "King Ngi Ngan" + } + video: { + video_id: "HWf8CmTAIR4" + video_title: "PR-334: CMT: Convolutional Neural Networks Meet Vision Transformers" + number_of_likes: 12 + number_of_views: 361 + published_date: { + seconds: 1627231618 + } + uploader: "JoonHo LEE" + } + } +} +pr_id_to_video: { + key: 335 + value: { + pr_id: 335 + papers: { + paper_id: "self-supervised-learning-for-deep-models-in" + title: "Self-supervised Learning for Large-scale Item Recommendations" + arxiv_id: "2007.12865" + abstract: "Large scale recommender models find most relevant items from huge catalogs, and they play a critical role in modern search and recommendation systems. To model the input space with large-vocab categorical features, a typical recommender model learns a joint embedding space through neural networks for both queries and items from user feedback data. However, with millions to billions of items in the corpus, users tend to provide feedback for a very small set of them, causing a power-law distribution. This makes the feedback data for long-tail items extremely sparse. Inspired by the recent success in self-supervised representation learning research in both computer vision and natural language understanding, we propose a multi-task self-supervised learning (SSL) framework for large-scale item recommendations. The framework is designed to tackle the label sparsity problem by learning better latent relationship of item features. Specifically, SSL improves item representation learning as well as serving as additional regularization to improve generalization. Furthermore, we propose a novel data augmentation method that utilizes feature correlations within the proposed framework. We evaluate our framework using two real-world datasets with 500M and 1B training examples respectively. Our results demonstrate the effectiveness of SSL regularization and show its superior performance over the state-of-the-art regularization techniques. We also have already launched the proposed techniques to a web-scale commercial app-to-app recommendation system, with significant improvements top-tier business metrics demonstrated in A/B experiments on live traffic. Our online results also verify our hypothesis that our framework indeed improves model performance even more on slices that lack supervision." + published_date: { + seconds: 1595635200 + } + authors: "Tiansheng Yao" + authors: "Xinyang Yi" + authors: "Derek Zhiyuan Cheng" + authors: "Felix Yu" + authors: "Ting Chen" + authors: "Aditya Menon" + authors: "Lichan Hong" + authors: "Ed H. Chi" + authors: "Steve Tjoa" + authors: "Jieqi Kang" + authors: "Evan Ettinger" + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + } + video: { + video_id: "FFXAm2uTmeI" + video_title: "PR-335: Self-supervised Learning for Large-scale Item Recommendations" + number_of_likes: 1 + number_of_views: 74 + published_date: { + seconds: 1628564230 + } + uploader: "박성남" + } + } +} +pr_id_to_video: { + key: 337 + value: { + pr_id: 337 + papers: { + paper_id: "cswin-transformer-a-general-vision" + title: "CSWin Transformer: A General Vision Transformer Backbone with Cross-Shaped Windows" + arxiv_id: "2107.00652" + abstract: "We present CSWin Transformer, an efficient and effective Transformer-based backbone for general-purpose vision tasks. A challenging issue in Transformer design is that global self-attention is very expensive to compute whereas local self-attention often limits the field of interactions of each token. To address this issue, we develop the Cross-Shaped Window self-attention mechanism for computing self-attention in the horizontal and vertical stripes in parallel that form a cross-shaped window, with each stripe obtained by splitting the input feature into stripes of equal width. We provide a detailed mathematical analysis of the effect of the stripe width and vary the stripe width for different layers of the Transformer network which achieves strong modeling capability while limiting the computation cost. We also introduce Locally-enhanced Positional Encoding (LePE), which handles the local positional information better than existing encoding schemes. LePE naturally supports arbitrary input resolutions, and is thus especially effective and friendly for downstream tasks. Incorporated with these designs and a hierarchical structure, CSWin Transformer demonstrates competitive performance on common vision tasks. Specifically, it achieves 85.4% Top-1 accuracy on ImageNet-1K without any extra training data or label, 53.9 box AP and 46.4 mask AP on the COCO detection task, and 51.7 mIOU on the ADE20K semantic segmentation task, surpassing previous state-of-the-art Swin Transformer backbone by +1.2, +2.0, +1.4, and +2.0 respectively under the similar FLOPs setting. By further pretraining on the larger dataset ImageNet-21K, we achieve 87.5% Top-1 accuracy on ImageNet-1K and state-of-the-art segmentation performance on ADE20K with 55.7 mIoU. The code and models will be available at https://github.com/microsoft/CSWin-Transformer." + published_date: { + seconds: 1625097600 + } + authors: "Xiaoyi Dong" + authors: "Jianmin Bao" + authors: "Dongdong Chen" + authors: "Weiming Zhang" + authors: "Nenghai Yu" + authors: "Lu Yuan" + authors: "Dong Chen" + authors: "Baining Guo" + repositories: { + is_official: true + url: "https://github.com/microsoft/CSWin-Transformer" + owner: "microsoft" + framework: FRAMEWORK_PYTORCH + number_of_stars: 129 + description: "CSWin Transformer: A General Vision Transformer Backbone with Cross-Shaped" + } + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + methods: { + name: "Transformer" + full_name: "Transformer" + description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." + } + methods: { + name: "Scaled Dot-Product Attention" + full_name: "Scaled Dot-Product Attention" + description: "**Scaled dot-product attention** is an attention mechanism where the dot products are scaled down by $\\sqrt{d_k}$. Formally we have a query $Q$, a key $K$ and a value $V$ and calculate the attention as:\r\n\r\n$$ {\\text{Attention}}(Q, K, V) = \\text{softmax}(\\frac{QK^{T}}{\\sqrt{d_k}})V $$\r\n\r\nIf we assume that $q$ and $k$ are $d_k$-dimensional vectors whose components are independent random variables with mean $0$ and variance $1$, then their dot product, $q \\cdot k = \\sum_{i=1}^{d_k} u_iv_i$, has mean $0$ and variance $d_k$. Since we would prefer these values to have variance $1$, we divide by $\\sqrt{d_k}$." + } + methods: { + name: "Softmax" + full_name: "Softmax" + description: "The **Softmax** output function transforms a previous layer's output into a vector of probabilities. It is commonly used for multiclass classification. Given an input vector $x$ and a weighting vector $w$ we have:\r\n\r\n$$ P(y=j \\mid{x}) = \\frac{e^{x^{T}w_{j}}}{\\sum^{K}_{k=1}e^{x^{T}wk}} $$" + } + methods: { + name: "Stochastic Depth" + full_name: "Stochastic Depth" + description: "**Stochastic Depth** aims to shrink the depth of a network during training, while\r\nkeeping it unchanged during testing. This is achieved by randomly dropping entire [ResBlocks](https://paperswithcode.com/method/residual-block) during training and bypassing their transformations through skip connections. \r\n\r\nLet $b\\_{l} \\in$ {$0, 1$} denote a Bernoulli random variable, which indicates whether the $l$th ResBlock is active ($b\\_{l} = 1$) or inactive ($b\\_{l} = 0$). Further, let us denote the “survival” probability of ResBlock $l$ as $p\\_{l} = \\text{Pr}\\left(b\\_{l} = 1\\right)$. With this definition we can bypass the $l$th ResBlock by multiplying its function $f\\_{l}$ with $b\\_{l}$ and we extend the update rule to:\r\n\r\n$$ H\\_{l} = \\text{ReLU}\\left(b\\_{l}f\\_{l}\\left(H\\_{l-1}\\right) + \\text{id}\\left(H\\_{l-1}\\right)\\right) $$\r\n\r\nIf $b\\_{l} = 1$, this reduces to the original ResNet update and this ResBlock remains unchanged. If $b\\_{l} = 0$, the ResBlock reduces to the identity function, $H\\_{l} = \\text{id}\\left((H\\_{l}−1\\right)$." + } + methods: { + name: "Swin Transformer" + full_name: "Swin Transformer" + description: "The **Swin Transformer** is a type of Vision Transformer. It builds hierarchical feature maps by merging image patches (shown in gray) in deeper layers and has linear computation complexity to input image size due to computation of self-attention only within each local window (shown in red). It can thus serve as a general-purpose backbone for both image classification and dense recognition tasks. In contrast, previous vision Transformers produce feature maps of a single low resolution and have quadratic computation complexity to input image size due to computation of self-attention globally." + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + } + papers: { + paper_id: "mobile-former-bridging-mobilenet-and" + title: "Mobile-Former: Bridging MobileNet and Transformer" + arxiv_id: "2108.05895" + abstract: "We present Mobile-Former, a parallel design of MobileNet and Transformer with a two-way bridge in between. This structure leverages the advantage of MobileNet at local processing and transformer at global interaction. And the bridge enables bidirectional fusion of local and global features. Different with recent works on vision transformer, the transformer in Mobile-Former contains very few tokens (e.g. less than 6 tokens) that are randomly initialized, resulting in low computational cost. Combining with the proposed light-weight cross attention to model the bridge, Mobile-Former is not only computationally efficient, but also has more representation power, outperforming MobileNetV3 at low FLOP regime from 25M to 500M FLOPs on ImageNet classification. For instance, it achieves 77.9\\% top-1 accuracy at 294M FLOPs, gaining 1.3\\% over MobileNetV3 but saving 17\\% of computations. When transferring to object detection, Mobile-Former outperforms MobileNetV3 by 8.6 AP." + published_date: { + seconds: 1628726400 + } + authors: "Yinpeng Chen" + authors: "Xiyang Dai" + authors: "Dongdong Chen" + authors: "Mengchen Liu" + authors: "Xiaoyi Dong" + authors: "Lu Yuan" + authors: "Zicheng Liu" + methods: { + name: "Hard Swish" + full_name: "Hard Swish" + description: "**Hard Swish** is a type of activation function based on [Swish](https://paperswithcode.com/method/swish), but replaces the computationally expensive sigmoid with a piecewise linear analogue:\r\n\r\n$$\\text{h-swish}\\left(x\\right) = x\\frac{\\text{ReLU6}\\left(x+3\\right)}{6} $$" + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "ReLU6" + full_name: "ReLU6" + description: "**ReLU6** is a modification of the [rectified linear unit](https://paperswithcode.com/method/relu) where we limit the activation to a maximum size of $6$. This is due to increased robustness when used with low-precision computation.\r\n\r\nImage Credit: [PyTorch](https://pytorch.org/docs/master/generated/torch.nn.ReLU6.html)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Label Smoothing" + full_name: "Label Smoothing" + description: "**Label Smoothing** is a regularization technique that introduces noise for the labels. This accounts for the fact that datasets may have mistakes in them, so maximizing the likelihood of $\\log{p}\\left(y\\mid{x}\\right)$ directly can be harmful. Assume for a small constant $\\epsilon$, the training set label $y$ is correct with probability $1-\\epsilon$ and incorrect otherwise. Label Smoothing regularizes a model based on a softmax with $k$ output values by replacing the hard $0$ and $1$ classification targets with targets of $\\frac{\\epsilon}{k-1}$ and $1-\\epsilon$ respectively.\r\n\r\nSource: Deep Learning, Goodfellow et al\r\n\r\nImage Source: [When Does Label Smoothing Help?](https://arxiv.org/abs/1906.02629)" + } + methods: { + name: "Transformer" + full_name: "Transformer" + description: "A **Transformer** is a model architecture that eschews recurrence and instead relies entirely on an [attention mechanism](https://paperswithcode.com/methods/category/attention-mechanisms-1) to draw global dependencies between input and output. Before Transformers, the dominant sequence transduction models were based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The Transformer also employs an encoder and decoder, but removing recurrence in favor of [attention mechanisms](https://paperswithcode.com/methods/category/attention-mechanisms-1) allows for significantly more parallelization than methods like [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and [CNNs](https://paperswithcode.com/methods/category/convolutional-neural-networks)." + } + methods: { + name: "Depthwise Separable Convolution" + full_name: "Depthwise Separable Convolution" + description: "While [standard convolution](https://paperswithcode.com/method/convolution) performs the channelwise and spatial-wise computation in one step, **Depthwise Separable Convolution** splits the computation into two steps: depthwise convolution applies a single convolutional filter per each input channel and pointwise convolution is used to create a linear combination of the output of the depthwise convolution. The comparison of standard convolution and depthwise separable convolution is shown to the right.\r\n\r\nCredit: [Depthwise Convolution Is All You Need for Learning Multiple Visual Domains](https://paperswithcode.com/paper/depthwise-convolution-is-all-you-need-for)" + } + methods: { + name: "Multi-Head Attention" + full_name: "Multi-Head Attention" + description: "**Multi-head Attention** is a module for attention mechanisms which runs through an attention mechanism several times in parallel. The independent attention outputs are then concatenated and linearly transformed into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently (e.g. longer-term dependencies versus shorter-term dependencies). \r\n\r\n$$ \\text{MultiHead}\\left(\\textbf{Q}, \\textbf{K}, \\textbf{V}\\right) = \\left[\\text{head}\\_{1},\\dots,\\text{head}\\_{h}\\right]\\textbf{W}_{0}$$\r\n\r\n$$\\text{where} \\text{ head}\\_{i} = \\text{Attention} \\left(\\textbf{Q}\\textbf{W}\\_{i}^{Q}, \\textbf{K}\\textbf{W}\\_{i}^{K}, \\textbf{V}\\textbf{W}\\_{i}^{V} \\right) $$\r\n\r\nAbove $\\textbf{W}$ are all learnable parameter matrices.\r\n\r\nNote that [scaled dot-product attention](https://paperswithcode.com/method/scaled) is most commonly used in this module, although in principle it can be swapped out for other types of attention mechanism.\r\n\r\nSource: [Lilian Weng](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html#a-family-of-attention-mechanisms)" + } + methods: { + name: "Depthwise Convolution" + full_name: "Depthwise Convolution" + description: "**Depthwise Convolution** is a type of convolution where we apply a single convolutional filter for each input channel. In the regular 2D [convolution](https://paperswithcode.com/method/convolution) performed over multiple input channels, the filter is as deep as the input and lets us freely mix channels to generate each element in the output. In contrast, depthwise convolutions keep each channel separate. To summarize the steps, we:\r\n\r\n1. Split the input and filter into channels.\r\n2. We convolve each input with the respective filter.\r\n3. We stack the convolved outputs together.\r\n\r\nImage Credit: [Chi-Feng Wang](https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728)" + } + } + papers: { + paper_id: "s-2-mlpv2-improved-spatial-shift-mlp" + title: "S$^2$-MLPv2: Improved Spatial-Shift MLP Architecture for Vision" + arxiv_id: "2108.01072" + abstract: "Recently, MLP-based vision backbones emerge. MLP-based vision architectures with less inductive bias achieve competitive performance in image recognition compared with CNNs and vision Transformers. Among them, spatial-shift MLP (S$^2$-MLP), adopting the straightforward spatial-shift operation, achieves better performance than the pioneering works including MLP-mixer and ResMLP. More recently, using smaller patches with a pyramid structure, Vision Permutator (ViP) and Global Filter Network (GFNet) achieve better performance than S$^2$-MLP. In this paper, we improve the S$^2$-MLP vision backbone. We expand the feature map along the channel dimension and split the expanded feature map into several parts. We conduct different spatial-shift operations on split parts. Meanwhile, we exploit the split-attention operation to fuse these split parts. Moreover, like the counterparts, we adopt smaller-scale patches and use a pyramid structure for boosting the image recognition accuracy. We term the improved spatial-shift MLP vision backbone as S$^2$-MLPv2. Using 55M parameters, our medium-scale model, S$^2$-MLPv2-Medium achieves an $83.6\\%$ top-1 accuracy on the ImageNet-1K benchmark using $224\\times 224$ images without self-attention and external training data." + published_date: { + seconds: 1627862400 + } + authors: "Tan Yu" + authors: "Xu Li" + authors: "Yunfeng Cai" + authors: "Mingming Sun" + authors: "Ping Li" + methods: { + name: "Residual Connection" + full_name: "Residual Connection" + description: "**Residual Connections** are a type of skip-connection that learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. \r\n\r\nFormally, denoting the desired underlying mapping as $\\mathcal{H}({x})$, we let the stacked nonlinear layers fit another mapping of $\\mathcal{F}({x}):=\\mathcal{H}({x})-{x}$. The original mapping is recast into $\\mathcal{F}({x})+{x}$.\r\n\r\nThe intuition is that it is easier to optimize the residual mapping than to optimize the original, unreferenced mapping. To the extreme, if an identity mapping were optimal, it would be easier to push the residual to zero than to fit an identity mapping by a stack of nonlinear layers." + } + methods: { + name: "GELU" + full_name: "Gaussian Error Linear Units" + description: "The **Gaussian Error Linear Unit**, or **GELU**, is an activation function. The GELU activation function is $x\\Phi(x)$, where $\\Phi(x)$ the standard Gaussian cumulative distribution function. The GELU nonlinearity weights inputs by their percentile, rather than gates inputs by their sign as in [ReLUs](https://paperswithcode.com/method/relu) ($x\\mathbf{1}_{x>0}$). Consequently the GELU can be thought of as a smoother ReLU.\r\n\r\n$$\\text{GELU}\\left(x\\right) = x{P}\\left(X\\leq{x}\\right) = x\\Phi\\left(x\\right) = x \\cdot \\frac{1}{2}\\left[1 + \\text{erf}(x/\\sqrt{2})\\right],$$\r\nif $X\\sim \\mathcal{N}(0,1)$.\r\n\r\nOne can approximate the GELU with\r\n$0.5x\\left(1+\\tanh\\left[\\sqrt{2/\\pi}\\left(x + 0.044715x^{3}\\right)\\right]\\right)$ or $x\\sigma\\left(1.702x\\right),$\r\nbut PyTorch's exact implementation is sufficiently fast such that these approximations may be unnecessary. (See also the [SiLU](https://paperswithcode.com/method/silu) $x\\sigma(x)$ which was also coined in the paper that introduced the GELU.)\r\n\r\nGELUs are used in GPT-3, BERT, and most other Transformers." + } + methods: { + name: "Layer Normalization" + full_name: "Layer Normalization" + description: "Unlike [batch normalization](https://paperswithcode.com/method/batch-normalization), **Layer Normalization** directly estimates the normalization statistics from the summed inputs to the neurons within a hidden layer so the normalization does not introduce any new dependencies between training cases. It works well for [RNNs](https://paperswithcode.com/methods/category/recurrent-neural-networks) and improves both the training time and the generalization performance of several existing RNN models. More recently, it has been used with [Transformer](https://paperswithcode.com/methods/category/transformers) models.\r\n\r\nWe compute the layer normalization statistics over all the hidden units in the same layer as follows:\r\n\r\n$$ \\mu^{l} = \\frac{1}{H}\\sum^{H}\\_{i=1}a\\_{i}^{l} $$\r\n\r\n$$ \\sigma^{l} = \\sqrt{\\frac{1}{H}\\sum^{H}\\_{i=1}\\left(a\\_{i}^{l}-\\mu^{l}\\right)^{2}} $$\r\n\r\nwhere $H$ denotes the number of hidden units in a layer. Under layer normalization, all the hidden units in a layer share the same normalization terms $\\mu$ and $\\sigma$, but different training cases have different normalization terms. Unlike batch normalization, layer normalization does not impose any constraint on the size of the mini-batch and it can be used in the pure online regime with batch size 1." + } + methods: { + name: "Average Pooling" + full_name: "Average Pooling" + description: "**Average Pooling** is a pooling operation that calculates the average value for patches of a feature map, and uses it to create a downsampled (pooled) feature map. It is usually used after a convolutional layer. It adds a small amount of translation invariance - meaning translating the image by a small amount does not significantly affect the values of most pooled outputs. It extracts features more smoothly than [Max Pooling](https://paperswithcode.com/method/max-pooling), whereas max pooling extracts more pronounced features like edges.\r\n\r\nImage Source: [here](https://www.researchgate.net/figure/Illustration-of-Max-Pooling-and-Average-Pooling-Figure-2-above-shows-an-example-of-max_fig2_333593451)" + } + methods: { + name: "Dense Connections" + full_name: "Dense Connections" + description: "**Dense Connections**, or **Fully Connected Connections**, are a type of layer in a deep neural network that use a linear operation where every input is connected to every output by a weight. This means there are $n\\_{\\text{inputs}}*n\\_{\\text{outputs}}$ parameters, which can lead to a lot of parameters for a sizeable network.\r\n\r\n$$h\\_{l} = g\\left(\\textbf{W}^{T}h\\_{l-1}\\right)$$\r\n\r\nwhere $g$ is an activation function.\r\n\r\nImage Source: Deep Learning by Goodfellow, Bengio and Courville" + } + methods: { + name: "MLP-Mixer" + full_name: "MLP-Mixer" + description: "The **MLP-Mixer** architecture (or “Mixer” for short) is an image architecture that doesn't use convolutions or self-attention. Instead, Mixer’s architecture is based entirely on multi-layer perceptrons (MLPs) that are repeatedly applied across either spatial locations or feature channels. Mixer relies only on basic matrix multiplication routines, changes to data layout (reshapes and transpositions), and scalar nonlinearities.\r\n\r\nIt accepts a sequence of linearly projected image patches (also referred to as tokens) shaped as a “patches × channels” table as an input, and maintains this dimensionality. Mixer makes use of two types of MLP layers: channel-mixing MLPs and token-mixing MLPs. The channel-mixing MLPs allow communication between different channels; they operate on each token independently and take individual rows of the table as inputs. The token-mixing MLPs allow communication between different spatial locations (tokens); they operate on each channel independently and take individual columns of the table as inputs. These two types of layers are interleaved to enable interaction of both input dimensions." + } + methods: { + name: "Global Average Pooling" + full_name: "Global Average Pooling" + description: "**Global Average Pooling** is a pooling operation designed to replace fully connected layers in classical CNNs. The idea is to generate one feature map for each corresponding category of the classification task in the last mlpconv layer. Instead of adding fully connected layers on top of the feature maps, we take the average of each feature map, and the resulting vector is fed directly into the softmax layer. \r\n\r\nOne advantage of global average pooling over the fully connected layers is that it is more native to the convolution structure by enforcing correspondences between feature maps and categories. Thus the feature maps can be easily interpreted as categories confidence maps. Another advantage is that there is no parameter to optimize in the global average pooling thus overfitting is avoided at this layer. Furthermore, global average pooling sums out the spatial information, thus it is more robust to spatial translations of the input." + } + methods: { + name: "Dropout" + full_name: "Dropout" + description: "**Dropout** is a regularization technique for neural networks that drops a unit (along with connections) at training time with a specified probability $p$ (a common value is $p=0.5$). At test time, all units are present, but with weights scaled by $p$ (i.e. $w$ becomes $pw$).\r\n\r\nThe idea is to prevent co-adaptation, where the neural network becomes too reliant on particular connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating an implicit ensemble of neural networks." + } + } + video: { + video_id: "20kxrS2yglg" + video_title: "PR-337: Multi-Scale Features in Transformers (Swin and CSWin Transformers)" + number_of_views: 82 + published_date: { + seconds: 1628436838 + } + uploader: "Byung-Hak Kim" + } + } +} +pr_id_to_video: { + key: 338 + value: { + pr_id: 338 + papers: { + paper_id: "alias-free-generative-adversarial-networks" + title: "Alias-Free Generative Adversarial Networks" + arxiv_id: "2106.12423" + abstract: "We observe that despite their hierarchical convolutional nature, the synthesis process of typical generative adversarial networks depends on absolute pixel coordinates in an unhealthy manner. This manifests itself as, e.g., detail appearing to be glued to image coordinates instead of the surfaces of depicted objects. We trace the root cause to careless signal processing that causes aliasing in the generator network. Interpreting all signals in the network as continuous, we derive generally applicable, small architectural changes that guarantee that unwanted information cannot leak into the hierarchical synthesis process. The resulting networks match the FID of StyleGAN2 but differ dramatically in their internal representations, and they are fully equivariant to translation and rotation even at subpixel scales. Our results pave the way for generative models better suited for video and animation." + published_date: { + seconds: 1624406400 + } + authors: "Tero Karras" + authors: "Miika Aittala" + authors: "Samuli Laine" + authors: "Erik Härkönen" + authors: "Janne Hellsten" + authors: "Jaakko Lehtinen" + authors: "Timo Aila" + repositories: { + url: "https://github.com/duskvirkus/alias-free-gan" + owner: "duskvirkus" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "Incomplete! PyTorch Lightning Alias-Free GAN based on rosinality's unofficial implementation. More extensive training options and TPU support." + } + repositories: { + url: "https://github.com/duskvirkus/alias-free-gan-pytorch-lightning" + owner: "duskvirkus" + framework: FRAMEWORK_PYTORCH + number_of_stars: 14 + description: "Incomplete! PyTorch Lightning Alias-Free GAN based on rosinality's unofficial implementation. More extensive training options and TPU support." + } + repositories: { + url: "https://github.com/jychoi118/toward_spatial_unbiased" + owner: "jychoi118" + framework: FRAMEWORK_PYTORCH + number_of_stars: 56 + description: "Toward Spatially Unbiased Generative Models (ICCV 2021)" + } + repositories: { + url: "https://github.com/rosinality/alias-free-gan-pytorch" + owner: "rosinality" + framework: FRAMEWORK_PYTORCH + number_of_stars: 385 + description: "Unofficial implementation of Alias-Free Generative Adversarial Networks. (https://arxiv.org/abs/2106.12423) in PyTorch" + } + methods: { + name: "Convolution" + full_name: "Convolution" + description: "A **convolution** is a type of matrix operation, consisting of a kernel, a small matrix of weights, that slides over input data performing element-wise multiplication with the part of the input it is on, then summing the results into an output.\r\n\r\nIntuitively, a convolution allows for weight sharing - reducing the number of effective parameters - and image translation (allowing for the same feature to be detected in different parts of the input space).\r\n\r\nImage Source: [https://arxiv.org/pdf/1603.07285.pdf](https://arxiv.org/pdf/1603.07285.pdf)" + } + methods: { + name: "StyleGAN2" + full_name: "StyleGAN2" + description: "**StyleGAN2** is a generative adversarial network that builds on [StyleGAN](https://paperswithcode.com/method/stylegan) with several improvements. First, adaptive instance normalization is redesigned and replaced with a normalization technique called weight demodulation. Secondly, an improved training scheme upon progressively growing is introduced, which achieves the same goal - training starts by focusing on low-resolution images and then progressively shifts focus to higher and higher resolutions - without changing the network topology during training. Additionally, new types of regularization like lazy regularization and path length regularization are proposed." + } + methods: { + name: "Weight Demodulation" + full_name: "Weight Demodulation" + description: "**Weight Modulation** is an alternative to [adaptive instance normalization](https://paperswithcode.com/method/adaptive-instance-normalization) for use in generative adversarial networks, specifically it is introduced in [StyleGAN2](https://paperswithcode.com/method/stylegan2). The purpose of instance normalization is to remove the effect of $s$ - the scales of the features maps - from the statistics of the convolution’s output feature maps. Weight modulation tries to achieve this goal more directly. Assuming that input activations are i.i.d. random variables with unit standard deviation. After modulation and convolution, the output activations have standard deviation of:\r\n\r\n$$ \\sigma\\_{j} = \\sqrt{{\\sum\\_{i,k}w\\_{ijk}'}^{2}} $$\r\n\r\ni.e., the outputs are scaled by the $L\\_{2}$ norm of the corresponding weights. The subsequent normalization aims to restore the outputs back to unit standard deviation. This can be achieved if we scale (“demodulate”) each output feature map $j$ by $1/\\sigma\\_{j}$ . Alternatively, we can again bake this into the convolution weights:\r\n\r\n$$ w''\\_{ijk} = w'\\_{ijk} / \\sqrt{{\\sum\\_{i, k}w'\\_{ijk}}^{2} + \\epsilon} $$\r\n\r\nwhere $\\epsilon$ is a small constant to avoid numerical issues." + } + methods: { + name: "Path Length Regularization" + full_name: "Path Length Regularization" + description: "**Path Length Regularization** is a type of regularization for [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks) that encourages good conditioning in the mapping from latent codes to images. The idea is to encourage that a fixed-size step in the latent space $\\mathcal{W}$ results in a non-zero, fixed-magnitude change in the image.\r\n\r\nWe can measure the deviation from this ideal empirically by stepping into random directions in the image space and observing the corresponding $\\mathbf{w}$ gradients. These gradients should have close to an equal length regardless of $\\mathbf{w}$ or the image-space direction, indicating that the mapping from the latent space to image space is well-conditioned.\r\n\r\nAt a single $\\mathbf{w} \\in \\mathcal{W}$ the local metric scaling properties of the generator mapping $g\\left(\\mathbf{w}\\right) : \\mathcal{W} \\rightarrow \\mathcal{Y}$ are captured by the Jacobian matrix $\\mathbf{J\\_{w}} = \\delta{g}\\left(\\mathbf{w}\\right)/\\delta{\\mathbf{w}}$. Motivated by the desire to preserve the expected lengths of vectors regardless of the direction, we formulate the regularizer as:\r\n\r\n$$ \\mathbb{E}\\_{\\mathbf{w},\\mathbf{y} \\sim \\mathcal{N}\\left(0, \\mathbf{I}\\right)} \\left(||\\mathbf{J}^{\\mathbf{T}}\\_{\\mathbf{w}}\\mathbf{y}||\\_{2} - a\\right)^{2} $$\r\n\r\nwhere $y$ are random images with normally distributed pixel intensities, and $w \\sim f\\left(z\\right)$, where $z$ are normally distributed. \r\n\r\nTo avoid explicit computation of the Jacobian matrix, we use the identity $\\mathbf{J}^{\\mathbf{T}}\\_{\\mathbf{w}}\\mathbf{y} = \\nabla\\_{\\mathbf{w}}\\left(g\\left(\\mathbf{w}\\right)·y\\right)$, which is efficiently computable using standard backpropagation. The constant $a$ is set dynamically during optimization as the long-running exponential moving average of the lengths $||\\mathbf{J}^{\\mathbf{T}}\\_{\\mathbf{w}}\\mathbf{y}||\\_{2}$, allowing the optimization to find a suitable global scale by itself.\r\n\r\nThe authors note that they find that path length regularization leads to more reliable and consistently behaving models, making architecture exploration easier. They also observe that the smoother generator is significantly easier to invert." + } + methods: { + name: "Leaky ReLU" + full_name: "Leaky ReLU" + description: "**Leaky Rectified Linear Unit**, or **Leaky ReLU**, is a type of activation function based on a [ReLU](https://paperswithcode.com/method/relu), but it has a small slope for negative values instead of a flat slope. The slope coefficient is determined before training, i.e. it is not learnt during training. This type of activation function is popular in tasks where we we may suffer from sparse gradients, for example training generative adversarial networks." + } + methods: { + name: "R1 Regularization" + full_name: "R1 Regularization" + description: "**R$\\_{1}$ Regularization** is a regularization technique and gradient penalty for training [generative adversarial networks](https://paperswithcode.com/methods/category/generative-adversarial-networks). It penalizes the discriminator from deviating from the Nash Equilibrium via penalizing the gradient on real data alone: when the generator distribution produces the true data distribution and the discriminator is equal to 0 on the data manifold, the gradient penalty ensures that the discriminator cannot create a non-zero gradient orthogonal to the data manifold without suffering a loss in the GAN game.\r\n\r\nThis leads to the following regularization term:\r\n\r\n$$ R\\_{1}\\left(\\psi\\right) = \\frac{\\gamma}{2}E\\_{p\\_{D}\\left(x\\right)}\\left[||\\nabla{D\\_{\\psi}\\left(x\\right)}||^{2}\\right] $$" + } + } + papers: { + paper_id: "interpreting-generative-adversarial-networks" + title: "Interpreting Generative Adversarial Networks for Interactive Image Generation" + arxiv_id: "2108.04896" + abstract: "Great progress has been made by the advances in Generative Adversarial Networks (GANs) for image generation. However, there lacks enough understanding on how a realistic image can be generated by the deep representations of GANs from a random vector. This chapter will give a summary of recent works on interpreting deep generative models. We will see how the human-understandable concepts that emerge in the learned representation can be identified and used for interactive image generation and editing." + published_date: { + seconds: 1628553600 + } + authors: "Bolei Zhou" + } + video: { + video_id: "BZwUR9hvBPE" + video_title: "PR-338 Alias-Free Generative Adversarial Networks" + number_of_likes: 11 + number_of_views: 350 + published_date: { + seconds: 1628467702 + } + uploader: "Jaejun Yoo" + } + } +} +pr_id_to_video: { + key: 339 + value: { + pr_id: 339 + papers: { + paper_id: "maintaining-discrimination-and-fairness-in" + title: "Maintaining Discrimination and Fairness in Class Incremental Learning" + arxiv_id: "1911.07053" + abstract: "Deep neural networks (DNNs) have been applied in class incremental learning, which aims to solve common real-world problems of learning new classes continually. One drawback of standard DNNs is that they are prone to catastrophic forgetting. Knowledge distillation (KD) is a commonly used technique to alleviate this problem. In this paper, we demonstrate it can indeed help the model to output more discriminative results within old classes. However, it cannot alleviate the problem that the model tends to classify objects into new classes, causing the positive effect of KD to be hidden and limited. We observed that an important factor causing catastrophic forgetting is that the weights in the last fully connected (FC) layer are highly biased in class incremental learning. In this paper, we propose a simple and effective solution motivated by the aforementioned observations to address catastrophic forgetting. Firstly, we utilize KD to maintain the discrimination within old classes. Then, to further maintain the fairness between old classes and new classes, we propose Weight Aligning (WA) that corrects the biased weights in the FC layer after normal training process. Unlike previous work, WA does not require any extra parameters or a validation set in advance, as it utilizes the information provided by the biased weights themselves. The proposed method is evaluated on ImageNet-1000, ImageNet-100, and CIFAR-100 under various settings. Experimental results show that the proposed method can effectively alleviate catastrophic forgetting and significantly outperform state-of-the-art methods." + published_date: { + seconds: 1573862400 + } + authors: "Bowen Zhao" + authors: "Xi Xiao" + authors: "Guojun Gan" + authors: "Bin Zhang" + authors: "Shutao Xia" + repositories: { + url: "https://github.com/hugoycj/Incremental-Learning-with-Weight-Aligning" + owner: "hugoycj" + framework: FRAMEWORK_PYTORCH + number_of_stars: 21 + description: "Pytorch implementation of Maintaining Discrimination and Fairness in Class Incremental Learning" + } + } + papers: { + paper_id: "a-comprehensive-study-of-class-incremental" + title: "A Comprehensive Study of Class Incremental Learning Algorithms for Visual Tasks" + arxiv_id: "2011.01844" + abstract: "The ability of artificial agents to increment their capabilities when confronted with new data is an open challenge in artificial intelligence. The main challenge faced in such cases is catastrophic forgetting, i.e., the tendency of neural networks to underfit past data when new ones are ingested. A first group of approaches tackles forgetting by increasing deep model capacity to accommodate new knowledge. A second type of approaches fix the deep model size and introduce a mechanism whose objective is to ensure a good compromise between stability and plasticity of the model. While the first type of algorithms were compared thoroughly, this is not the case for methods which exploit a fixed size model. Here, we focus on the latter, place them in a common conceptual and experimental framework and propose the following contributions: (1) define six desirable properties of incremental learning algorithms and analyze them according to these properties, (2) introduce a unified formalization of the class-incremental learning problem, (3) propose a common evaluation framework which is more thorough than existing ones in terms of number of datasets, size of datasets, size of bounded memory and number of incremental states, (4) investigate the usefulness of herding for past exemplars selection, (5) provide experimental evidence that it is possible to obtain competitive performance without the use of knowledge distillation to tackle catastrophic forgetting and (6) facilitate reproducibility by integrating all tested methods in a common open-source repository. The main experimental finding is that none of the existing algorithms achieves the best results in all evaluated settings. Important differences arise notably if a bounded memory of past classes is allowed or not." + published_date: { + seconds: 1604361600 + } + authors: "Eden Belouadah" + authors: "Adrian Popescu" + authors: "Ioannis Kanellos" + repositories: { + is_official: true + url: "https://github.com/EdenBelouadah/class-incremental-learning" + owner: "EdenBelouadah" + framework: FRAMEWORK_PYTORCH + number_of_stars: 103 + } + } + video: { + video_id: "hptinxZIXT4" + video_title: "PR-339: Maintaining discrimination and fairness in class incremental learning" + number_of_likes: 1 + number_of_views: 38 + published_date: { + seconds: 1629123504 + } + uploader: "Sunghoon Joo" + } + } +} diff --git a/server/internal/data/mapping_table.pbtxt b/server/internal/data/mapping_table.pbtxt index fb450c4e..b082ae2c 100644 --- a/server/internal/data/mapping_table.pbtxt +++ b/server/internal/data/mapping_table.pbtxt @@ -2458,3 +2458,47 @@ rows: { paper_arxiv_ids: "2106.07998" youtube_video_id: "rI-vJuNKyIU" } +rows: { + pr_id: 332 + paper_arxiv_ids: "2007.08929" + paper_arxiv_ids: "1802.02679" + paper_arxiv_ids: "2011.13529" + youtube_video_id: "-5fFL68d7Gg" +} +rows: { + pr_id: 333 + paper_arxiv_ids: "2005.00743" + paper_arxiv_ids: "2106.03714" + youtube_video_id: "I9kQwMbpxuE" +} +rows: { + pr_id: 334 + paper_arxiv_ids: "2107.06263" + paper_arxiv_ids: "2103.15808" + paper_arxiv_ids: "2103.15099" + youtube_video_id: "HWf8CmTAIR4" +} +rows: { + pr_id: 335 + paper_arxiv_ids: "2007.12865" + youtube_video_id: "FFXAm2uTmeI" +} +rows: { + pr_id: 337 + paper_arxiv_ids: "2107.00652" + paper_arxiv_ids: "2108.05895" + paper_arxiv_ids: "2108.01072" + youtube_video_id: "20kxrS2yglg" +} +rows: { + pr_id: 338 + paper_arxiv_ids: "2106.12423" + paper_arxiv_ids: "2108.04896" + youtube_video_id: "BZwUR9hvBPE" +} +rows: { + pr_id: 339 + paper_arxiv_ids: "1911.07053" + paper_arxiv_ids: "2011.01844" + youtube_video_id: "hptinxZIXT4" +}