import React from "react";
import "../../styles/ProjectDetails.scss";
// import { Carousel } from "react-responsive-carousel";
import "react-responsive-carousel/lib/styles/carousel.min.css";
// import { Link } from "react-router-dom";

const IngreRecipeGen = () => {
  return (
    <div className="project-details-container">
      {/* Banner Section */}
      <div className="project-image">
        <img
          src={"/assets/projects/IngreRecipeGen/Landing.png"}
          alt={"Ingredient Recipe Generator"}
          className="project-image"
        />
      </div>

      {/* Overview Section */}
      <div className="section model-training">
        <h2>Overview</h2>
        <p>
          This project is simply a Sequence-to-Sequence modelling task, mapping
          ingredients to a recipe using RNN techniques. It consists of 4 custom
          RNN training models:
          <li>
            <strong>Baseline 1:</strong> Sequence-to-sequence RNN
            <img
              src={"/assets/projects/IngreRecipeGen/Baseline1.png"}
              alt={"Baseline model 1"}
              className="desc-image"
            />
          </li>
          <li>
            <strong>Baseline 2:</strong> Sequence-to-sequence RNN with attention
            <img
              src={"/assets/projects/IngreRecipeGen/Baseline2.png"}
              alt={"Baseline model 2"}
              className="desc-image"
            />
          </li>
          <li>
            <strong>Extension 1: </strong>
            Sequence-to-sequence RNN with attention and data-preprocessing
            <img
              src={"/assets/projects/IngreRecipeGen/Extension1.png"}
              alt={"Extension model 1"}
              className="desc-image"
            />
          </li>
          <li>
            <strong>Extension 2: </strong>Sequence-to-sequence RNN with
            attention, data-preprocessing, pretrained word2vec embedding and
            advanced RNN technique with reference to the paper of{" "}
            <i>
              Ximing Lu, Peter West, Rowan Zellers, Ronan Le Bras, Chandra
              Bhagavatula, Yejin Choi. NeuroLogic Decoding: (Un)supervised
              Neural Text Generation with Predicate Logic Constraints. In
              Proceedings of the 2021 Conference of the North American Chapter
              of the Association for Computational Linguistics
            </i>
          </li>
        </p>
        <img
          src={"/assets/projects/IngreRecipeGen/Extension2.png"}
          alt={"Extension model 2"}
          className="desc-image"
        />
        <img
          src={"/assets/projects/IngreRecipeGen/Paper.png"}
          alt={"Detailed descriptions from the reference chosen"}
          className="desc-image"
        />
      </div>

      {/* Github Link Section */}
      <div className="section github-link">
        <h2>Github Link</h2>
        <ul>
          <li>
            <strong>
              GitHub{" "}
              <i className="fab fa-github" style={{ marginRight: "5px" }}></i>:{" "}
            </strong>{" "}
            <a
              href="https://github.com/YiJieNG/NLP---Ingredient-Recipe-Generator"
              target="_blank"
              rel="noopener noreferrer"
            >
              github.com/YiJieNG/NLP---Ingredient-Recipe-Generator
            </a>
          </li>
        </ul>
      </div>

      {/* Challenges */}
      <div className="section challenges">
        <h2>Challenges</h2>
        <p>
          {" "}
          This project showcases a self-built sequence-to-sequence RNN model,
          emphasizing <strong>technique comparison</strong> rather than perfect
          performance. The evaluation faced challenges due to{" "}
          <strong>limited resources</strong> in hardware, time, data quality,
          and completeness. Consequently, the primary goal was to{" "}
          <strong>compare methodologies</strong>, not to generate an ideal
          recipe from the provided ingredients.{" "}
        </p>
      </div>

      {/*Model & Training Configurations Section */}
      <div className="section model-training-config">
        <h2>Model & Training Configurations</h2>
        <img
          src={"/assets/projects/IngreRecipeGen/model-training-config.png"}
          alt={"Model & Training Configurations"}
          className="desc-image"
        />
      </div>

      {/*Data Statistic Section */}
      <div className="section model-training-config">
        <h2>Data Statistic</h2>
        <img
          src={"/assets/projects/IngreRecipeGen/DataStatistic.png"}
          alt={"Data Statistic"}
          className="desc-image"
        />
      </div>

      {/*Data Preprocessing Section */}
      <div className="section data-preprocessing">
        <h2>Data Preprocessing</h2>
        <p>
          For every model, I have removed any data having no value (either
          ingredients or recipe or both), then convert the cell value into
          string.
        </p>
        <p>For extension 1 and 2 model with data preprocessing:</p>
        <p>
          <i>
            <strong>1. For Ingredients:</strong>
          </i>
          <li>
            Lower down every character and transform them from Unicode to ascii.
          </li>
          <li>Substitute the stopwords with space.</li>
          <li>
            Remove character other than a to z and space notation such as \t
          </li>
          <li>Separate the left valid ingredients (using \t).</li>
          <li>Remove the extra space.</li>
          <li>
            Make the repeated ingredients to a unique ingredient to avoid
            repeated ingredients
          </li>
          <li>Concatenate the unique ingredients into a long string.</li>
        </p>
        <p>For extension 1 and 2 model with data preprocessing:</p>
        <p>
          <i>
            <strong>2. For Recipe:</strong>
          </i>
          <li>
            The reason why ingredients have extra preprocessing step compared to
            recipe is because recipe is considered as ground truth label, and
            the expected output of model should be a valid and complete sentence
            of a recipe. However, we only need a certain information for
            ingredient thus more preprocessing step is introduced for
            ingredients compared to recipe.
          </li>
        </p>
      </div>

      {/*Evaluation Section */}
      <div className="section data-preprocessing">
        <h2>Evaluation</h2>
        <p>
          <strong>1. Train and Valid loss:</strong>
        </p>
        <p>
          <img
            src={"/assets/projects/IngreRecipeGen/TrainValidLoss.png"}
            alt={"Train and Valid loss for each model"}
            className="desc-image"
          />
        </p>
        <p>
          <strong>2. Test loss:</strong>
        </p>
        <p>
          <img
            src={"/assets/projects/IngreRecipeGen/TestLoss.png"}
            alt={"Test loss for each model"}
            className="desc-image"
          />
        </p>
        <p>
          <strong>3. Justification:</strong>
        </p>

        <p>
          Generally, we use <strong>train loss</strong> to fit and let the model
          <strong>learn to minimize the loss</strong>. The training loss for all
          models keep decreasing and slowly converged. The purpose of{" "}
          <strong>valid loss is to evaluate the performance</strong> of the
          models. We then <strong>modify the hyperparameter</strong> and
          manually trying to minimize the valid loss. Finally,{" "}
          <strong>test loss</strong> is used for evaluating the{" "}
          <strong>performance of trained model</strong> with defined
          hyperparameter. Based on the plotting and the test loss evaluated, we
          can conclude that{" "}
          <strong>
            Baseline 2 model outperformed all of the models, followed by
            Extension 1 and 2 model
          </strong>
          . The <strong>worst model</strong> that computes the highest test loss
          goes to <strong>Baseline 1</strong>.
        </p>
        <p>
          As we can see from the graph for{" "}
          <strong>best model of Baseline 2</strong>, the{" "}
          <strong>valid loss has the most obvious decreasing trend</strong>.
          This is because since baseline 2 has no text preprocessing but with
          attention mechanism, there are still useful information or
          relationship available for model to capture. For next two models of
          <strong>Extension 1 and 2</strong>, they show a{" "}
          <strong>non-obvious decreasing trend</strong> for valid loss which
          means they also learning something but not much in each iteration.
          Hence their test losses are very close to each other. The reason
          behind is because text-preprocessing removed a lot of redundant words
          which allows the model to learn more faster and converge.
        </p>
        <p>
          For the worst model of <strong>Baseline 1</strong>, the{" "}
          <strong>valid loss trends are unstable</strong>, and most of the time
          is increasing. This is where <strong>overfitting</strong> occurs where
          the train loss is decreasing but the valid loss is increasing.
          Baseline 1 overfits earlier than other models which cause the worst
          test performance.
        </p>
      </div>

      {/* Footer Section */}
      <div className="footer">
        <p>&copy; Ng Yi Jie</p>
      </div>
    </div>
  );
};

export default IngreRecipeGen;
