wys的个人博客

你有很多事放不下?做人要潇洒一点~

0%

6.828 不存在riscv64-linux-gnu-gdb

上课的时候发现老师使用的是 riscv64-linux-gnu-gdb 但实际操作发现不存在这个程序。

参照这个网页发现,Arch Linux 装的是 riscv64-linux-gnu-gdb

Debian or Ubuntu 装的是 gdb-multiarch

启动gdb-multiarch 发现没有装载文件。

参照这个网页 发现需要远程连接GDB服务器。

服务端

1
2
cd xv6-labs-2023
make qemu-gdb

客户端:

1
2
cd xv6-labs-2023
gdb-multiarch

然后在客户端的的GDB 中输入 target remote localhost:260000 连接服务端。

cs61c_lab10

Part1: Multi-threading programming using OpenMP

Exercise 2 - Vector Addition

方法1如下:

1
2
3
4
5
6
7
8
9
void v_add_optimized_adjacent(double* x, double* y, double* z) {
#pragma omp parallel
{
int num_threads = omp_get_num_threads();
int id = omp_get_thread_num();
for(int i=id; i<ARRAY_SIZE; i+= num_threads)
z[i] = x[i] + y[i];
}
}

方法二如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void v_add_optimized_chunks(double* x, double* y, double* z) {
#pragma omp parallel
{
int num_threads = omp_get_num_threads();
int id = omp_get_thread_num();
if (id != num_threads - 1) {
for(int i=ARRAY_SIZE/num_threads*id; i<ARRAY_SIZE/num_threads*(id + 1); i++)
z[i] = x[i] + y[i];
}
else {
for(int i=ARRAY_SIZE/num_threads*id; i<ARRAY_SIZE; i++)
z[i] = x[i] + y[i];
}
}
}

Exercise 3 - Dot Product

不使用reduction的代码如下,不知道题目想干什么,这部分参考了1 ,感觉还是不是很符合题意:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
double dotp_manual_optimized(double* x, double* y, int arr_size) {
double global_sum = 0.0;
int num_threads;
#pragma omp parallel
{
num_threads = omp_get_num_threads();
}
double *sum = (double *)malloc(num_threads * sizeof(double));
memset(sum, 0, sizeof(sum));
#pragma omp parallel
{
int id = omp_get_thread_num();
double now = 0.0;
for (int i = id; i < arr_size; i += num_threads) {
now += x[i] * y[i];
}
sum[id] = now;
}
for (int i = 0; i < num_threads; i ++) {
global_sum += sum[i];
}
free(sum);
return global_sum;
}

下面是使用了reduction的代码:

1
2
3
4
5
6
7
8
9
10
double dotp_reduction_optimized(double* x, double* y, int arr_size) {
double global_sum = 0.0;
#pragma omp parallel
{
#pragma omp for reduction(+:global_sum)
for (int i = 0; i < arr_size; i++)
global_sum += x[i] * y[i];
}
return global_sum;
}

Part 2: Intro to multi-processing programming

这题貌似直接使用gcc-11会报错,可以改用gcc-9。或者把变量的定义放进server_utils.c 然后将 server_utils.h 中的变量加上extern

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
while (1) {
client_socket_number = accept(*socket_number,
(struct sockaddr *) &client_address,
(socklen_t * ) & client_address_length);
if (client_socket_number < 0) {
perror("Error accepting socket");
continue;
}

printf("Accepted connection from %s on port %d\n",
inet_ntoa(client_address.sin_addr), client_address.sin_port);

pid_t parent_pid = getpid();
#ifdef PROC
// PART2 TASK: Implement forking
/* YOUR CODE HERE */
pid_t child_pid = fork();
if (child_pid != 0) {
// Kill child process if parent dies
int r = prctl(PR_SET_PDEATHSIG, SIGTERM);

/* YOUR CODE HERE */
dispatch(client_socket_number);
// Exit with code 1 when there was an error,
// or when the parent has been killed
if (r == -1 || getppid() != parent_pid) {
perror(0);
exit(1);
}

/* YOUR CODE HERE */
exit(EXIT_SUCCESS);
}
#else
dispatch(client_socket_number);
#endif
}

cs61c_lab9

Exercise 1 - Familiarize Yourself with the SIMD Functions

  1. __m128 _mm_div_ps (__m128 a, __m128 b)

  2. __m128i _mm_max_epi8 (__m128i a, __m128i b)

  3. __m128i _mm_sra_epi16 (__m128i a, __m128i count)

Exercise 2 - Writing SIMD Code

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
long long int sum_simd(unsigned int vals[NUM_ELEMS]) {
clock_t start = clock();
__m128i _127 = _mm_set1_epi32(127); // This is a vector with 127s in it... Why might you need this?
long long int result = 0; // This is where you should put your final result!
/* DO NOT DO NOT DO NOT DO NOT WRITE ANYTHING ABOVE THIS LINE. */

for(unsigned int w = 0; w < OUTER_ITERATIONS; w++) {
/* YOUR CODE GOES HERE */
__m128i result_itr = _mm_setzero_si128();
for (unsigned int i = 0; i < NUM_ELEMS / 4 * 4; i += 4) {
__m128i vals_m128i = _mm_loadu_si128((__m128i *) &vals[i]);
result_itr = _mm_add_epi32(result_itr ,_mm_and_si128(vals_m128i, _mm_cmpgt_epi32(vals_m128i, _127)));
}
unsigned int results[4] = {0,0,0,0};
_mm_storeu_si128((__m128i *)results, result_itr);
for (int i = 0; i < 4; i ++) {
result += results[i];
}
/* You'll need a tail case. */
for (int i = NUM_ELEMS / 4 * 4; i < NUM_ELEMS; i ++) {
if(vals[i] >= 128) result += vals[i];
}
}
clock_t end = clock();
printf("Time taken: %Lf s\n", (long double)(end - start) / CLOCKS_PER_SEC);
return result;
}

Exercise 3 - Loop Unrolling

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
long long int sum_simd_unrolled(unsigned int vals[NUM_ELEMS]) {
clock_t start = clock();
__m128i _127 = _mm_set1_epi32(127);
long long int result = 0;
for(unsigned int w = 0; w < OUTER_ITERATIONS; w++) {
/* COPY AND PASTE YOUR sum_simd() HERE */
/* MODIFY IT BY UNROLLING IT */
__m128i result_itr = _mm_setzero_si128();
for (unsigned int i = 0; i < NUM_ELEMS / 16 * 16; i += 16) {
__m128i vals_m128i = _mm_loadu_si128((__m128i *) &vals[i]);
result_itr = _mm_add_epi32(result_itr ,_mm_and_si128(vals_m128i, _mm_cmpgt_epi32(vals_m128i, _127)));
vals_m128i = _mm_loadu_si128((__m128i *) &vals[i + 4]);
result_itr = _mm_add_epi32(result_itr ,_mm_and_si128(vals_m128i, _mm_cmpgt_epi32(vals_m128i, _127)));
vals_m128i = _mm_loadu_si128((__m128i *) &vals[i + 8]);
result_itr = _mm_add_epi32(result_itr ,_mm_and_si128(vals_m128i, _mm_cmpgt_epi32(vals_m128i, _127)));
vals_m128i = _mm_loadu_si128((__m128i *) &vals[i + 12]);
result_itr = _mm_add_epi32(result_itr ,_mm_and_si128(vals_m128i, _mm_cmpgt_epi32(vals_m128i, _127)));
}
unsigned int results[4] = {0,0,0,0};
_mm_storeu_si128((__m128i *)results, result_itr);
for (int i = 0; i < 4; i ++) {
result += results[i];
}
/* You'll need 1 or maybe 2 tail cases here. */
for (int i = NUM_ELEMS / 16 * 16; i < NUM_ELEMS; i ++) {
if(vals[i] >= 128) result += vals[i];
}
}
clock_t end = clock();
printf("Time taken: %Lf s\n", (long double)(end - start) / CLOCKS_PER_SEC);
return result;
}

cs61c_lab8

Exercise 1 - Working with CAMERA 地址序列为 4D, 1E, 7F, 4C, 8E, CF, 6D, D8, 3B, E2

  1. 32字节。
  2. TLB Hits: 3, TLB Misses: 7. Page Hits 0, Page Faults 7.
  3. 没有Page Hits,也不会有Page Hits,因为实验环境中frame的数量和TLB中页面的数量是一样的。
  4. 首先访问TLB,TLB没有命中。接着访问页表,页表中有效位为0,物理内存中没有该帧,引发缺页错误,从磁盘中将该帧载入物理内存,然后将物理地址写入页表和TLB。
Read more »

cs61c_lab7

Exercise 1:

Scenario 1: 1. Because (Step Size) * 4 in bytes (32 bytes) is exactly equal to Cache Size in bytes. 2. hit rate will always be zero. because the index portion is always 00, and the address we access is always mapped to the first set.However the tag portion is always different, so the first cache line is always evicted so that the hit rate will always be zero. 3. Change Step Size to 1

Read more »

cs61c_lab6

Exercise 1 - Inefficiencies Everywhere

1. Max Delay

\[ Max\_Delay=CLK\_to\_Q\_Delay + CL\_Delay + Setup\_Time\\ =CLK\_to\_Q\_Delay + (Adder\_Delay + Multiplication\_Delay) + Setup\_Time\\ =10ns+(45ns+60ns)+10ns\\ =125ns \]

Read more »

cs61c_lab5

Part 1: Sub-Circuits

NAND1

NOR1

XOR1

MUX2

通过列真值表可以得到以下逻辑式:

\(\neg{A}BSel+A\neg{B}\neg{Sel}+AB\neg{Sel}+ABSel=RESULT\)

可以画出如下电路:

Read more »

cs61c_lab4

Exercise 1: Debugging megalistmanips.s

错误之处:

  1. 偏移量没有乘4。
  2. 偏移量应该加在结构体中数组的地址上而不是数组的地址的地址上。
  3. 调用函数之前没有保存s1。
  4. 在内存中取字应该是lw,而不是la。
  5. 将s1中的内容移入a1,而不是将s1指向的地址中的内容移入a1。

代码如下:

Read more »

cs61c_lab3

Exercise 1: Familiarizing yourself with Venus

  1. .data 指令用于指定程序中数据段的开始。在这个部分中,可以定义程序将使用的变量和数据。.word 指令用于在 .data 部分内分配空间并初始化数据值。.text 指令用于指定程序的代码或文本段的开始。
  2. 最后输出结果是34,表示斐波那契数列的第九项。
  3. n的地址是0x10000008
  4. 在运行过程中把t3改为0x0000000D

题外话:我网上查到说ecall的调用号应该在a7,但是这个代码里的调用号不知道为什么在a0,可能是cs61c自己开发的这个Venus模拟器的问题。

Exercise 2: Translating from C to RISC-V

Read more »

cs61c_lab2

Exercise 0: Makefiles

看makefile文件回答问题。

我的回答如下:

  1. clean
  2. all
  3. gcc
  4. c99
  5. 可以使用$(FOO)引用FOO变量
  6. Darwin是macos的内核
  7. 第31行从目标文件创建了lfsr程序

Exercise 1: Bit Operations

我的代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <stdio.h>
#include "bit_ops.h"

unsigned get_bit(unsigned x,
unsigned n) {
return (x >> n) & 1;
}
void set_bit(unsigned * x,
unsigned n,
unsigned v) {
*x = *x & ~(1 << n);
*x = *x | (v << n);
}
void flip_bit(unsigned * x,
unsigned n) {
*x = *x ^ (1 << n);
}
Read more »